(root)/
coreutils-9.4/
src/
fmt.c
       1  /* GNU fmt -- simple text formatter.
       2     Copyright (C) 1994-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
      18  
      19  #include <config.h>
      20  #include <stdio.h>
      21  #include <sys/types.h>
      22  #include <getopt.h>
      23  
      24  /* Redefine.  Otherwise, systems (Unicos for one) with headers that define
      25     it to be a type get syntax errors for the variable declaration below.  */
      26  #define word unused_word_type
      27  
      28  #include "c-ctype.h"
      29  #include "system.h"
      30  #include "fadvise.h"
      31  #include "xdectoint.h"
      32  
      33  /* The official name of this program (e.g., no 'g' prefix).  */
      34  #define PROGRAM_NAME "fmt"
      35  
      36  #define AUTHORS proper_name ("Ross Paterson")
      37  
      38  /* The following parameters represent the program's idea of what is
      39     "best".  Adjust to taste, subject to the caveats given.  */
      40  
      41  /* Default longest permitted line length (max_width).  */
      42  #define WIDTH	75
      43  
      44  /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
      45     room for optimization.  */
      46  #define LEEWAY	7
      47  
      48  /* The default secondary indent of tagged paragraph used for unindented
      49     one-line paragraphs not preceded by any multi-line paragraphs.  */
      50  #define DEF_INDENT 3
      51  
      52  /* Costs and bonuses are expressed as the equivalent departure from the
      53     optimal line length, multiplied by 10.  e.g. assigning something a
      54     cost of 50 means that it is as bad as a line 5 characters too short
      55     or too long.  The definition of SHORT_COST(n) should not be changed.
      56     However, EQUIV(n) may need tuning.  */
      57  
      58  /* FIXME: "fmt" misbehaves given large inputs or options.  One
      59     possible workaround for part of the problem is to change COST to be
      60     a floating-point type.  There are other problems besides COST,
      61     though; see MAXWORDS below.  */
      62  
      63  typedef long int COST;
      64  
      65  #define MAXCOST	TYPE_MAXIMUM (COST)
      66  
      67  #define SQR(n)		((n) * (n))
      68  #define EQUIV(n)	SQR ((COST) (n))
      69  
      70  /* Cost of a filled line n chars longer or shorter than goal_width.  */
      71  #define SHORT_COST(n)	EQUIV ((n) * 10)
      72  
      73  /* Cost of the difference between adjacent filled lines.  */
      74  #define RAGGED_COST(n)	(SHORT_COST (n) / 2)
      75  
      76  /* Basic cost per line.  */
      77  #define LINE_COST	EQUIV (70)
      78  
      79  /* Cost of breaking a line after the first word of a sentence, where
      80     the length of the word is N.  */
      81  #define WIDOW_COST(n)	(EQUIV (200) / ((n) + 2))
      82  
      83  /* Cost of breaking a line before the last word of a sentence, where
      84     the length of the word is N.  */
      85  #define ORPHAN_COST(n)	(EQUIV (150) / ((n) + 2))
      86  
      87  /* Bonus for breaking a line at the end of a sentence.  */
      88  #define SENTENCE_BONUS	EQUIV (50)
      89  
      90  /* Cost of breaking a line after a period not marking end of a sentence.
      91     With the definition of sentence we are using (borrowed from emacs, see
      92     get_line()) such a break would then look like a sentence break.  Hence
      93     we assign a very high cost -- it should be avoided unless things are
      94     really bad.  */
      95  #define NOBREAK_COST	EQUIV (600)
      96  
      97  /* Bonus for breaking a line before open parenthesis.  */
      98  #define PAREN_BONUS	EQUIV (40)
      99  
     100  /* Bonus for breaking a line after other punctuation.  */
     101  #define PUNCT_BONUS	EQUIV(40)
     102  
     103  /* Credit for breaking a long paragraph one line later.  */
     104  #define LINE_CREDIT	EQUIV(3)
     105  
     106  /* Size of paragraph buffer, in words and characters.  Longer paragraphs
     107     are handled neatly (cf. flush_paragraph()), so long as these values
     108     are considerably greater than required by the width.  These values
     109     cannot be extended indefinitely: doing so would run into size limits
     110     and/or cause more overflows in cost calculations.  FIXME: Remove these
     111     arbitrary limits.  */
     112  
     113  #define MAXWORDS	1000
     114  #define MAXCHARS	5000
     115  
     116  /* Extra ctype(3)-style macros.  */
     117  
     118  #define isopen(c)	(strchr ("(['`\"", c) != nullptr)
     119  #define isclose(c)	(strchr (")]'\"", c) != nullptr)
     120  #define isperiod(c)	(strchr (".?!", c) != nullptr)
     121  
     122  /* Size of a tab stop, for expansion on input and re-introduction on
     123     output.  */
     124  #define TABWIDTH	8
     125  
     126  /* Word descriptor structure.  */
     127  
     128  typedef struct Word WORD;
     129  
     130  struct Word
     131    {
     132  
     133      /* Static attributes determined during input.  */
     134  
     135      char const *text;		/* the text of the word */
     136      int length;			/* length of this word */
     137      int space;			/* the size of the following space */
     138      unsigned int paren:1;	/* starts with open paren */
     139      unsigned int period:1;	/* ends in [.?!])* */
     140      unsigned int punct:1;	/* ends in punctuation */
     141      unsigned int final:1;	/* end of sentence */
     142  
     143      /* The remaining fields are computed during the optimization.  */
     144  
     145      int line_length;		/* length of the best line starting here */
     146      COST best_cost;		/* cost of best paragraph starting here */
     147      WORD *next_break;		/* break which achieves best_cost */
     148    };
     149  
     150  /* Forward declarations.  */
     151  
     152  static void set_prefix (char *p);
     153  static bool fmt (FILE *f, char const *);
     154  static bool get_paragraph (FILE *f);
     155  static int get_line (FILE *f, int c);
     156  static int get_prefix (FILE *f);
     157  static int get_space (FILE *f, int c);
     158  static int copy_rest (FILE *f, int c);
     159  static bool same_para (int c);
     160  static void flush_paragraph (void);
     161  static void fmt_paragraph (void);
     162  static void check_punctuation (WORD *w);
     163  static COST base_cost (WORD *this);
     164  static COST line_cost (WORD *next, int len);
     165  static void put_paragraph (WORD *finish);
     166  static void put_line (WORD *w, int indent);
     167  static void put_word (WORD *w);
     168  static void put_space (int space);
     169  
     170  /* Option values.  */
     171  
     172  /* If true, first 2 lines may have different indent (default false).  */
     173  static bool crown;
     174  
     175  /* If true, first 2 lines _must_ have different indent (default false).  */
     176  static bool tagged;
     177  
     178  /* If true, each line is a paragraph on its own (default false).  */
     179  static bool split;
     180  
     181  /* If true, don't preserve inter-word spacing (default false).  */
     182  static bool uniform;
     183  
     184  /* Prefix minus leading and trailing spaces (default "").  */
     185  static char const *prefix;
     186  
     187  /* User-supplied maximum line width (default WIDTH).  The only output
     188     lines longer than this will each comprise a single word.  */
     189  static int max_width;
     190  
     191  /* Values derived from the option values.  */
     192  
     193  /* The length of prefix minus leading space.  */
     194  static int prefix_full_length;
     195  
     196  /* The length of the leading space trimmed from the prefix.  */
     197  static int prefix_lead_space;
     198  
     199  /* The length of prefix minus leading and trailing space.  */
     200  static int prefix_length;
     201  
     202  /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
     203  static int goal_width;
     204  
     205  /* Dynamic variables.  */
     206  
     207  /* Start column of the character most recently read from the input file.  */
     208  static int in_column;
     209  
     210  /* Start column of the next character to be written to stdout.  */
     211  static int out_column;
     212  
     213  /* Space for the paragraph text -- longer paragraphs are handled neatly
     214     (cf. flush_paragraph()).  */
     215  static char parabuf[MAXCHARS];
     216  
     217  /* A pointer into parabuf, indicating the first unused character position.  */
     218  static char *wptr;
     219  
     220  /* The words of a paragraph -- longer paragraphs are handled neatly
     221     (cf. flush_paragraph()).  */
     222  static WORD word[MAXWORDS];
     223  
     224  /* A pointer into the above word array, indicating the first position
     225     after the last complete word.  Sometimes it will point at an incomplete
     226     word.  */
     227  static WORD *word_limit;
     228  
     229  /* If true, current input file contains tab characters, and so tabs can be
     230     used for white space on output.  */
     231  static bool tabs;
     232  
     233  /* Space before trimmed prefix on each line of the current paragraph.  */
     234  static int prefix_indent;
     235  
     236  /* Indentation of the first line of the current paragraph.  */
     237  static int first_indent;
     238  
     239  /* Indentation of other lines of the current paragraph */
     240  static int other_indent;
     241  
     242  /* To detect the end of a paragraph, we need to look ahead to the first
     243     non-blank character after the prefix on the next line, or the first
     244     character on the following line that failed to match the prefix.
     245     We can reconstruct the lookahead from that character (next_char), its
     246     position on the line (in_column) and the amount of space before the
     247     prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
     248  
     249  /* The last character read from the input file.  */
     250  static int next_char;
     251  
     252  /* The space before the trimmed prefix (or part of it) on the next line
     253     after the current paragraph.  */
     254  static int next_prefix_indent;
     255  
     256  /* If nonzero, the length of the last line output in the current
     257     paragraph, used to charge for raggedness at the split point for long
     258     paragraphs chosen by fmt_paragraph().  */
     259  static int last_line_length;
     260  
     261  void
     262  usage (int status)
     263  {
     264    if (status != EXIT_SUCCESS)
     265      emit_try_help ();
     266    else
     267      {
     268        printf (_("Usage: %s [-WIDTH] [OPTION]... [FILE]...\n"), program_name);
     269        fputs (_("\
     270  Reformat each paragraph in the FILE(s), writing to standard output.\n\
     271  The option -WIDTH is an abbreviated form of --width=DIGITS.\n\
     272  "), stdout);
     273  
     274        emit_stdin_note ();
     275        emit_mandatory_arg_note ();
     276  
     277        fputs (_("\
     278    -c, --crown-margin        preserve indentation of first two lines\n\
     279    -p, --prefix=STRING       reformat only lines beginning with STRING,\n\
     280                                reattaching the prefix to reformatted lines\n\
     281    -s, --split-only          split long lines, but do not refill\n\
     282  "),
     283               stdout);
     284        /* Tell xgettext that the "% o" below is not a printf-style
     285           format string:  xgettext:no-c-format */
     286        fputs (_("\
     287    -t, --tagged-paragraph    indentation of first line different from second\n\
     288    -u, --uniform-spacing     one space between words, two after sentences\n\
     289    -w, --width=WIDTH         maximum line width (default of 75 columns)\n\
     290    -g, --goal=WIDTH          goal width (default of 93% of width)\n\
     291  "), stdout);
     292        fputs (HELP_OPTION_DESCRIPTION, stdout);
     293        fputs (VERSION_OPTION_DESCRIPTION, stdout);
     294        emit_ancillary_info (PROGRAM_NAME);
     295      }
     296    exit (status);
     297  }
     298  
     299  /* Decode options and launch execution.  */
     300  
     301  static struct option const long_options[] =
     302  {
     303    {"crown-margin", no_argument, nullptr, 'c'},
     304    {"prefix", required_argument, nullptr, 'p'},
     305    {"split-only", no_argument, nullptr, 's'},
     306    {"tagged-paragraph", no_argument, nullptr, 't'},
     307    {"uniform-spacing", no_argument, nullptr, 'u'},
     308    {"width", required_argument, nullptr, 'w'},
     309    {"goal", required_argument, nullptr, 'g'},
     310    {GETOPT_HELP_OPTION_DECL},
     311    {GETOPT_VERSION_OPTION_DECL},
     312    {nullptr, 0, nullptr, 0},
     313  };
     314  
     315  int
     316  main (int argc, char **argv)
     317  {
     318    int optchar;
     319    bool ok = true;
     320    char const *max_width_option = nullptr;
     321    char const *goal_width_option = nullptr;
     322  
     323    initialize_main (&argc, &argv);
     324    set_program_name (argv[0]);
     325    setlocale (LC_ALL, "");
     326    bindtextdomain (PACKAGE, LOCALEDIR);
     327    textdomain (PACKAGE);
     328  
     329    atexit (close_stdout);
     330  
     331    crown = tagged = split = uniform = false;
     332    max_width = WIDTH;
     333    prefix = "";
     334    prefix_length = prefix_lead_space = prefix_full_length = 0;
     335  
     336    if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
     337      {
     338        /* Old option syntax; a dash followed by one or more digits.  */
     339        max_width_option = argv[1] + 1;
     340  
     341        /* Make the option we just parsed invisible to getopt.  */
     342        argv[1] = argv[0];
     343        argv++;
     344        argc--;
     345      }
     346  
     347    while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:g:",
     348                                   long_options, nullptr))
     349           != -1)
     350      switch (optchar)
     351        {
     352        default:
     353          if (ISDIGIT (optchar))
     354            error (0, 0, _("invalid option -- %c; -WIDTH is recognized\
     355   only when it is the first\noption; use -w N instead"),
     356                   optchar);
     357          usage (EXIT_FAILURE);
     358  
     359        case 'c':
     360          crown = true;
     361          break;
     362  
     363        case 's':
     364          split = true;
     365          break;
     366  
     367        case 't':
     368          tagged = true;
     369          break;
     370  
     371        case 'u':
     372          uniform = true;
     373          break;
     374  
     375        case 'w':
     376          max_width_option = optarg;
     377          break;
     378  
     379        case 'g':
     380          goal_width_option = optarg;
     381          break;
     382  
     383        case 'p':
     384          set_prefix (optarg);
     385          break;
     386  
     387        case_GETOPT_HELP_CHAR;
     388  
     389        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
     390  
     391        }
     392  
     393    if (max_width_option)
     394      {
     395        /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
     396           output can be quite ugly.  */
     397        max_width = xdectoumax (max_width_option, 0, MAXCHARS / 2, "",
     398                                _("invalid width"), 0);
     399      }
     400  
     401    if (goal_width_option)
     402      {
     403        /* Limit goal_width to max_width.  */
     404        goal_width = xdectoumax (goal_width_option, 0, max_width, "",
     405                                 _("invalid width"), 0);
     406        if (max_width_option == nullptr)
     407          max_width = goal_width + 10;
     408      }
     409    else
     410      {
     411        goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
     412      }
     413  
     414    bool have_read_stdin = false;
     415  
     416    if (optind == argc)
     417      {
     418        have_read_stdin = true;
     419        ok = fmt (stdin, "-");
     420      }
     421    else
     422      {
     423        for (; optind < argc; optind++)
     424          {
     425            char *file = argv[optind];
     426            if (STREQ (file, "-"))
     427              {
     428                ok &= fmt (stdin, file);
     429                have_read_stdin = true;
     430              }
     431            else
     432              {
     433                FILE *in_stream;
     434                in_stream = fopen (file, "r");
     435                if (in_stream != nullptr)
     436                  ok &= fmt (in_stream, file);
     437                else
     438                  {
     439                    error (0, errno, _("cannot open %s for reading"),
     440                           quoteaf (file));
     441                    ok = false;
     442                  }
     443              }
     444          }
     445      }
     446  
     447    if (have_read_stdin && fclose (stdin) != 0)
     448      error (EXIT_FAILURE, errno, "%s", _("closing standard input"));
     449  
     450    return ok ? EXIT_SUCCESS : EXIT_FAILURE;
     451  }
     452  
     453  /* Trim space from the front and back of the string P, yielding the prefix,
     454     and record the lengths of the prefix and the space trimmed.  */
     455  
     456  static void
     457  set_prefix (char *p)
     458  {
     459    char *s;
     460  
     461    prefix_lead_space = 0;
     462    while (*p == ' ')
     463      {
     464        prefix_lead_space++;
     465        p++;
     466      }
     467    prefix = p;
     468    prefix_full_length = strlen (p);
     469    s = p + prefix_full_length;
     470    while (s > p && s[-1] == ' ')
     471      s--;
     472    *s = '\0';
     473    prefix_length = s - p;
     474  }
     475  
     476  /* Read F and send formatted output to stdout.
     477     Close F when done, unless F is stdin.  Diagnose input errors, using FILE.
     478     If !F, assume F resulted from an fopen failure and diagnose that.
     479     Return true if successful.  */
     480  
     481  static bool
     482  fmt (FILE *f, char const *file)
     483  {
     484    fadvise (f, FADVISE_SEQUENTIAL);
     485    tabs = false;
     486    other_indent = 0;
     487    next_char = get_prefix (f);
     488    while (get_paragraph (f))
     489      {
     490        fmt_paragraph ();
     491        put_paragraph (word_limit);
     492      }
     493  
     494    int err = ferror (f) ? 0 : -1;
     495    if (f == stdin)
     496      clearerr (f);
     497    else if (fclose (f) != 0 && err < 0)
     498      err = errno;
     499    if (0 <= err)
     500      error (0, err, err ? "%s" : _("read error"), quotef (file));
     501    return err < 0;
     502  }
     503  
     504  /* Set the global variable 'other_indent' according to SAME_PARAGRAPH
     505     and other global variables.  */
     506  
     507  static void
     508  set_other_indent (bool same_paragraph)
     509  {
     510    if (split)
     511      other_indent = first_indent;
     512    else if (crown)
     513      {
     514        other_indent = (same_paragraph ? in_column : first_indent);
     515      }
     516    else if (tagged)
     517      {
     518        if (same_paragraph && in_column != first_indent)
     519          {
     520            other_indent = in_column;
     521          }
     522  
     523        /* Only one line: use the secondary indent from last time if it
     524           splits, or 0 if there have been no multi-line paragraphs in the
     525           input so far.  But if these rules make the two indents the same,
     526           pick a new secondary indent.  */
     527  
     528        else if (other_indent == first_indent)
     529          other_indent = first_indent == 0 ? DEF_INDENT : 0;
     530      }
     531    else
     532      {
     533        other_indent = first_indent;
     534      }
     535  }
     536  
     537  /* Read a paragraph from input file F.  A paragraph consists of a
     538     maximal number of non-blank (excluding any prefix) lines subject to:
     539     * In split mode, a paragraph is a single non-blank line.
     540     * In crown mode, the second and subsequent lines must have the
     541     same indentation, but possibly different from the indent of the
     542     first line.
     543     * Tagged mode is similar, but the first and second lines must have
     544     different indentations.
     545     * Otherwise, all lines of a paragraph must have the same indent.
     546     If a prefix is in effect, it must be present at the same indent for
     547     each line in the paragraph.
     548  
     549     Return false if end-of-file was encountered before the start of a
     550     paragraph, else true.  */
     551  
     552  static bool
     553  get_paragraph (FILE *f)
     554  {
     555    int c;
     556  
     557    last_line_length = 0;
     558    c = next_char;
     559  
     560    /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
     561  
     562    while (c == '\n' || c == EOF
     563           || next_prefix_indent < prefix_lead_space
     564           || in_column < next_prefix_indent + prefix_full_length)
     565      {
     566        c = copy_rest (f, c);
     567        if (c == EOF)
     568          {
     569            next_char = EOF;
     570            return false;
     571          }
     572        putchar ('\n');
     573        c = get_prefix (f);
     574      }
     575  
     576    /* Got a suitable first line for a paragraph.  */
     577  
     578    prefix_indent = next_prefix_indent;
     579    first_indent = in_column;
     580    wptr = parabuf;
     581    word_limit = word;
     582    c = get_line (f, c);
     583    set_other_indent (same_para (c));
     584  
     585    /* Read rest of paragraph (unless split is specified).  */
     586  
     587    if (split)
     588      {
     589        /* empty */
     590      }
     591    else if (crown)
     592      {
     593        if (same_para (c))
     594          {
     595            do
     596              {			/* for each line till the end of the para */
     597                c = get_line (f, c);
     598              }
     599            while (same_para (c) && in_column == other_indent);
     600          }
     601      }
     602    else if (tagged)
     603      {
     604        if (same_para (c) && in_column != first_indent)
     605          {
     606            do
     607              {			/* for each line till the end of the para */
     608                c = get_line (f, c);
     609              }
     610            while (same_para (c) && in_column == other_indent);
     611          }
     612      }
     613    else
     614      {
     615        while (same_para (c) && in_column == other_indent)
     616          c = get_line (f, c);
     617      }
     618  
     619    (word_limit - 1)->period = (word_limit - 1)->final = true;
     620    next_char = c;
     621    return true;
     622  }
     623  
     624  /* Copy to the output a line that failed to match the prefix, or that
     625     was blank after the prefix.  In the former case, C is the character
     626     that failed to match the prefix.  In the latter, C is \n or EOF.
     627     Return the character (\n or EOF) ending the line.  */
     628  
     629  static int
     630  copy_rest (FILE *f, int c)
     631  {
     632    char const *s;
     633  
     634    out_column = 0;
     635    if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
     636      {
     637        put_space (next_prefix_indent);
     638        for (s = prefix; out_column != in_column && *s; out_column++)
     639          putchar (*s++);
     640        if (c != EOF && c != '\n')
     641          put_space (in_column - out_column);
     642        if (c == EOF && in_column >= next_prefix_indent + prefix_length)
     643          putchar ('\n');
     644      }
     645    while (c != '\n' && c != EOF)
     646      {
     647        putchar (c);
     648        c = getc (f);
     649      }
     650    return c;
     651  }
     652  
     653  /* Return true if a line whose first non-blank character after the
     654     prefix (if any) is C could belong to the current paragraph,
     655     otherwise false.  */
     656  
     657  static bool
     658  same_para (int c)
     659  {
     660    return (next_prefix_indent == prefix_indent
     661            && in_column >= next_prefix_indent + prefix_full_length
     662            && c != '\n' && c != EOF);
     663  }
     664  
     665  /* Read a line from input file F, given first non-blank character C
     666     after the prefix, and the following indent, and break it into words.
     667     A word is a maximal non-empty string of non-white characters.  A word
     668     ending in [.?!][])"']* and followed by end-of-line or at least two
     669     spaces ends a sentence, as in emacs.
     670  
     671     Return the first non-blank character of the next line.  */
     672  
     673  static int
     674  get_line (FILE *f, int c)
     675  {
     676    int start;
     677    char *end_of_parabuf;
     678    WORD *end_of_word;
     679  
     680    end_of_parabuf = &parabuf[MAXCHARS];
     681    end_of_word = &word[MAXWORDS - 2];
     682  
     683    do
     684      {				/* for each word in a line */
     685  
     686        /* Scan word.  */
     687  
     688        word_limit->text = wptr;
     689        do
     690          {
     691            if (wptr == end_of_parabuf)
     692              {
     693                set_other_indent (true);
     694                flush_paragraph ();
     695              }
     696            *wptr++ = c;
     697            c = getc (f);
     698          }
     699        while (c != EOF && !c_isspace (c));
     700        in_column += word_limit->length = wptr - word_limit->text;
     701        check_punctuation (word_limit);
     702  
     703        /* Scan inter-word space.  */
     704  
     705        start = in_column;
     706        c = get_space (f, c);
     707        word_limit->space = in_column - start;
     708        word_limit->final = (c == EOF
     709                             || (word_limit->period
     710                                 && (c == '\n' || word_limit->space > 1)));
     711        if (c == '\n' || c == EOF || uniform)
     712          word_limit->space = word_limit->final ? 2 : 1;
     713        if (word_limit == end_of_word)
     714          {
     715            set_other_indent (true);
     716            flush_paragraph ();
     717          }
     718        word_limit++;
     719      }
     720    while (c != '\n' && c != EOF);
     721    return get_prefix (f);
     722  }
     723  
     724  /* Read a prefix from input file F.  Return either first non-matching
     725     character, or first non-blank character after the prefix.  */
     726  
     727  static int
     728  get_prefix (FILE *f)
     729  {
     730    int c;
     731  
     732    in_column = 0;
     733    c = get_space (f, getc (f));
     734    if (prefix_length == 0)
     735      next_prefix_indent = prefix_lead_space < in_column ?
     736        prefix_lead_space : in_column;
     737    else
     738      {
     739        char const *p;
     740        next_prefix_indent = in_column;
     741        for (p = prefix; *p != '\0'; p++)
     742          {
     743            unsigned char pc = *p;
     744            if (c != pc)
     745              return c;
     746            in_column++;
     747            c = getc (f);
     748          }
     749        c = get_space (f, c);
     750      }
     751    return c;
     752  }
     753  
     754  /* Read blank characters from input file F, starting with C, and keeping
     755     in_column up-to-date.  Return first non-blank character.  */
     756  
     757  static int
     758  get_space (FILE *f, int c)
     759  {
     760    while (true)
     761      {
     762        if (c == ' ')
     763          in_column++;
     764        else if (c == '\t')
     765          {
     766            tabs = true;
     767            in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
     768          }
     769        else
     770          return c;
     771        c = getc (f);
     772      }
     773  }
     774  
     775  /* Set extra fields in word W describing any attached punctuation.  */
     776  
     777  static void
     778  check_punctuation (WORD *w)
     779  {
     780    char const *start = w->text;
     781    char const *finish = start + (w->length - 1);
     782    unsigned char fin = *finish;
     783  
     784    w->paren = isopen (*start);
     785    w->punct = !! ispunct (fin);
     786    while (start < finish && isclose (*finish))
     787      finish--;
     788    w->period = isperiod (*finish);
     789  }
     790  
     791  /* Flush part of the paragraph to make room.  This function is called on
     792     hitting the limit on the number of words or characters.  */
     793  
     794  static void
     795  flush_paragraph (void)
     796  {
     797    WORD *split_point;
     798    WORD *w;
     799    int shift;
     800    COST best_break;
     801  
     802    /* In the special case where it's all one word, just flush it.  */
     803  
     804    if (word_limit == word)
     805      {
     806        fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
     807        wptr = parabuf;
     808        return;
     809      }
     810  
     811    /* Otherwise:
     812       - format what you have so far as a paragraph,
     813       - find a low-cost line break near the end,
     814       - output to there,
     815       - make that the start of the paragraph.  */
     816  
     817    fmt_paragraph ();
     818  
     819    /* Choose a good split point.  */
     820  
     821    split_point = word_limit;
     822    best_break = MAXCOST;
     823    for (w = word->next_break; w != word_limit; w = w->next_break)
     824      {
     825        if (w->best_cost - w->next_break->best_cost < best_break)
     826          {
     827            split_point = w;
     828            best_break = w->best_cost - w->next_break->best_cost;
     829          }
     830        if (best_break <= MAXCOST - LINE_CREDIT)
     831          best_break += LINE_CREDIT;
     832      }
     833    put_paragraph (split_point);
     834  
     835    /* Copy text of words down to start of parabuf -- we use memmove because
     836       the source and target may overlap.  */
     837  
     838    memmove (parabuf, split_point->text, wptr - split_point->text);
     839    shift = split_point->text - parabuf;
     840    wptr -= shift;
     841  
     842    /* Adjust text pointers.  */
     843  
     844    for (w = split_point; w <= word_limit; w++)
     845      w->text -= shift;
     846  
     847    /* Copy words from split_point down to word -- we use memmove because
     848       the source and target may overlap.  */
     849  
     850    memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
     851    word_limit -= split_point - word;
     852  }
     853  
     854  /* Compute the optimal formatting for the whole paragraph by computing
     855     and remembering the optimal formatting for each suffix from the empty
     856     one to the whole paragraph.  */
     857  
     858  static void
     859  fmt_paragraph (void)
     860  {
     861    WORD *start, *w;
     862    int len;
     863    COST wcost, best;
     864    int saved_length;
     865  
     866    word_limit->best_cost = 0;
     867    saved_length = word_limit->length;
     868    word_limit->length = max_width;	/* sentinel */
     869  
     870    for (start = word_limit - 1; start >= word; start--)
     871      {
     872        best = MAXCOST;
     873        len = start == word ? first_indent : other_indent;
     874  
     875        /* At least one word, however long, in the line.  */
     876  
     877        w = start;
     878        len += w->length;
     879        do
     880          {
     881            w++;
     882  
     883            /* Consider breaking before w.  */
     884  
     885            wcost = line_cost (w, len) + w->best_cost;
     886            if (start == word && last_line_length > 0)
     887              wcost += RAGGED_COST (len - last_line_length);
     888            if (wcost < best)
     889              {
     890                best = wcost;
     891                start->next_break = w;
     892                start->line_length = len;
     893              }
     894  
     895            /* This is a kludge to keep us from computing 'len' as the
     896               sum of the sentinel length and some non-zero number.
     897               Since the sentinel w->length may be INT_MAX, adding
     898               to that would give a negative result.  */
     899            if (w == word_limit)
     900              break;
     901  
     902            len += (w - 1)->space + w->length;	/* w > start >= word */
     903          }
     904        while (len < max_width);
     905        start->best_cost = best + base_cost (start);
     906      }
     907  
     908    word_limit->length = saved_length;
     909  }
     910  
     911  /* Work around <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109628>.  */
     912  #if 13 <= __GNUC__
     913  # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
     914  #endif
     915  
     916  /* Return the constant component of the cost of breaking before the
     917     word THIS.  */
     918  
     919  static COST
     920  base_cost (WORD *this)
     921  {
     922    COST cost;
     923  
     924    cost = LINE_COST;
     925  
     926    if (this > word)
     927      {
     928        if ((this - 1)->period)
     929          {
     930            if ((this - 1)->final)
     931              cost -= SENTENCE_BONUS;
     932            else
     933              cost += NOBREAK_COST;
     934          }
     935        else if ((this - 1)->punct)
     936          cost -= PUNCT_BONUS;
     937        else if (this > word + 1 && (this - 2)->final)
     938          cost += WIDOW_COST ((this - 1)->length);
     939      }
     940  
     941    if (this->paren)
     942      cost -= PAREN_BONUS;
     943    else if (this->final)
     944      cost += ORPHAN_COST (this->length);
     945  
     946    return cost;
     947  }
     948  
     949  /* Return the component of the cost of breaking before word NEXT that
     950     depends on LEN, the length of the line beginning there.  */
     951  
     952  static COST
     953  line_cost (WORD *next, int len)
     954  {
     955    int n;
     956    COST cost;
     957  
     958    if (next == word_limit)
     959      return 0;
     960    n = goal_width - len;
     961    cost = SHORT_COST (n);
     962    if (next->next_break != word_limit)
     963      {
     964        n = len - next->line_length;
     965        cost += RAGGED_COST (n);
     966      }
     967    return cost;
     968  }
     969  
     970  /* Output to stdout a paragraph from word up to (but not including)
     971     FINISH, which must be in the next_break chain from word.  */
     972  
     973  static void
     974  put_paragraph (WORD *finish)
     975  {
     976    WORD *w;
     977  
     978    put_line (word, first_indent);
     979    for (w = word->next_break; w != finish; w = w->next_break)
     980      put_line (w, other_indent);
     981  }
     982  
     983  /* Output to stdout the line beginning with word W, beginning in column
     984     INDENT, including the prefix (if any).  */
     985  
     986  static void
     987  put_line (WORD *w, int indent)
     988  {
     989    WORD *endline;
     990  
     991    out_column = 0;
     992    put_space (prefix_indent);
     993    fputs (prefix, stdout);
     994    out_column += prefix_length;
     995    put_space (indent - out_column);
     996  
     997    endline = w->next_break - 1;
     998    for (; w != endline; w++)
     999      {
    1000        put_word (w);
    1001        put_space (w->space);
    1002      }
    1003    put_word (w);
    1004    last_line_length = out_column;
    1005    putchar ('\n');
    1006  }
    1007  
    1008  /* Output to stdout the word W.  */
    1009  
    1010  static void
    1011  put_word (WORD *w)
    1012  {
    1013    char const *s;
    1014    int n;
    1015  
    1016    s = w->text;
    1017    for (n = w->length; n != 0; n--)
    1018      putchar (*s++);
    1019    out_column += w->length;
    1020  }
    1021  
    1022  /* Output to stdout SPACE spaces, or equivalent tabs.  */
    1023  
    1024  static void
    1025  put_space (int space)
    1026  {
    1027    int space_target, tab_target;
    1028  
    1029    space_target = out_column + space;
    1030    if (tabs)
    1031      {
    1032        tab_target = space_target / TABWIDTH * TABWIDTH;
    1033        if (out_column + 1 < tab_target)
    1034          while (out_column < tab_target)
    1035            {
    1036              putchar ('\t');
    1037              out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
    1038            }
    1039      }
    1040    while (out_column < space_target)
    1041      {
    1042        putchar (' ');
    1043        out_column++;
    1044      }
    1045  }