1  /* xgettext sh backend.
       2     Copyright (C) 2003, 2005-2009, 2014, 2018-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2003.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include "config.h"
      20  #endif
      21  
      22  /* Specification.  */
      23  #include "x-sh.h"
      24  
      25  #include <errno.h>
      26  #include <limits.h>
      27  #include <stdbool.h>
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <string.h>
      31  
      32  #include "attribute.h"
      33  #include "message.h"
      34  #include "xgettext.h"
      35  #include "xg-pos.h"
      36  #include "xg-mixed-string.h"
      37  #include "xg-arglist-context.h"
      38  #include "xg-arglist-callshape.h"
      39  #include "xg-arglist-parser.h"
      40  #include "xg-message.h"
      41  #include "error.h"
      42  #include "error-progname.h"
      43  #include "xalloc.h"
      44  #include "mem-hash-map.h"
      45  #include "../../gettext-runtime/src/escapes.h"
      46  #include "gettext.h"
      47  
      48  #define _(s) gettext(s)
      49  
      50  #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
      51  
      52  
      53  /* The sh syntax is defined in POSIX:2001, see
      54       http://www.opengroup.org/onlinepubs/007904975/utilities/xcu_chap02.html
      55     Summary of sh syntax:
      56     - Input is broken into words, which are then subject to
      57       - tilde expansion ~...
      58       - command substitution `...`
      59       - variable substitution $var
      60       - arithmetic substitution $((...))
      61       - field splitting at whitespace (IFS)
      62       - wildcard pattern expansion *?
      63       - quote removal
      64     - Strings are enclosed in "..."; command substitution, variable
      65       substitution and arithmetic substitution are performed here as well.
      66     - '...' is a string without substitutions.
      67     - The list of resulting words is split into commands by semicolon and
      68       newline.
      69     - '#' at the beginning of a word introduces a comment until end of line.
      70     The parser is implemented in bash-2.05b/parse.y.  */
      71  
      72  
      73  /* ====================== Keyword set customization.  ====================== */
      74  
      75  /* If true extract all strings.  */
      76  static bool extract_all = false;
      77  
      78  static hash_table keywords;
      79  static bool default_keywords = true;
      80  
      81  
      82  void
      83  x_sh_extract_all ()
      84  {
      85    extract_all = true;
      86  }
      87  
      88  
      89  void
      90  x_sh_keyword (const char *name)
      91  {
      92    if (name == NULL)
      93      default_keywords = false;
      94    else
      95      {
      96        const char *end;
      97        struct callshape shape;
      98        const char *colon;
      99  
     100        if (keywords.table == NULL)
     101          hash_init (&keywords, 100);
     102  
     103        split_keywordspec (name, &end, &shape);
     104  
     105        /* The characters between name and end should form a valid C identifier.
     106           A colon means an invalid parse in split_keywordspec().  */
     107        colon = strchr (name, ':');
     108        if (colon == NULL || colon >= end)
     109          insert_keyword_callshape (&keywords, name, end - name, &shape);
     110      }
     111  }
     112  
     113  /* Finish initializing the keywords hash table.
     114     Called after argument processing, before each file is processed.  */
     115  static void
     116  init_keywords ()
     117  {
     118    if (default_keywords)
     119      {
     120        /* When adding new keywords here, also update the documentation in
     121           xgettext.texi!  */
     122        x_sh_keyword ("gettext");
     123        x_sh_keyword ("ngettext:1,2");
     124        /* Note: There is also special handling for 'gettext' and 'ngettext'
     125           in read_command, below.  */
     126        x_sh_keyword ("eval_gettext");
     127        x_sh_keyword ("eval_ngettext:1,2");
     128        x_sh_keyword ("eval_pgettext:1c,2");
     129        x_sh_keyword ("eval_npgettext:1c,2,3");
     130        default_keywords = false;
     131      }
     132  }
     133  
     134  void
     135  init_flag_table_sh ()
     136  {
     137    xgettext_record_flag ("gettext:1:pass-sh-format");
     138    xgettext_record_flag ("ngettext:1:pass-sh-format");
     139    xgettext_record_flag ("ngettext:2:pass-sh-format");
     140    xgettext_record_flag ("eval_gettext:1:sh-format");
     141    xgettext_record_flag ("eval_ngettext:1:sh-format");
     142    xgettext_record_flag ("eval_ngettext:2:sh-format");
     143    xgettext_record_flag ("eval_pgettext:2:sh-format");
     144    xgettext_record_flag ("eval_npgettext:2:sh-format");
     145    xgettext_record_flag ("eval_npgettext:3:sh-format");
     146  }
     147  
     148  
     149  /* ======================== Reading of characters.  ======================== */
     150  
     151  /* The input file stream.  */
     152  static FILE *fp;
     153  
     154  
     155  /* Fetch the next character from the input file.  */
     156  static int
     157  do_getc ()
     158  {
     159    int c = getc (fp);
     160  
     161    if (c == EOF)
     162      {
     163        if (ferror (fp))
     164          error (EXIT_FAILURE, errno,
     165                 _("error while reading \"%s\""), real_file_name);
     166      }
     167    else if (c == '\n')
     168     line_number++;
     169  
     170    return c;
     171  }
     172  
     173  /* Put back the last fetched character, not EOF.  */
     174  static void
     175  do_ungetc (int c)
     176  {
     177    if (c == '\n')
     178      line_number--;
     179    ungetc (c, fp);
     180  }
     181  
     182  
     183  /* Remove backslash followed by newline from the input stream.  */
     184  
     185  static int phase1_pushback[2];
     186  static int phase1_pushback_length;
     187  
     188  static int
     189  phase1_getc ()
     190  {
     191    int c;
     192  
     193    if (phase1_pushback_length)
     194      {
     195        c = phase1_pushback[--phase1_pushback_length];
     196        if (c == '\n')
     197          ++line_number;
     198        return c;
     199      }
     200    for (;;)
     201      {
     202        c = do_getc ();
     203        if (c != '\\')
     204          return c;
     205        c = do_getc ();
     206        if (c != '\n')
     207          {
     208            if (c != EOF)
     209              do_ungetc (c);
     210            return '\\';
     211          }
     212      }
     213  }
     214  
     215  /* Supports only one pushback character.  */
     216  static void
     217  phase1_ungetc (int c)
     218  {
     219    switch (c)
     220      {
     221      case EOF:
     222        break;
     223  
     224      case '\n':
     225        --line_number;
     226        FALLTHROUGH;
     227  
     228      default:
     229        if (phase1_pushback_length == SIZEOF (phase1_pushback))
     230          abort ();
     231        phase1_pushback[phase1_pushback_length++] = c;
     232        break;
     233      }
     234  }
     235  
     236  
     237  /* ========================== Reading of tokens.  ========================== */
     238  
     239  
     240  /* A token consists of a sequence of characters.  */
     241  struct token
     242  {
     243    int allocated;                /* number of allocated 'token_char's */
     244    int charcount;                /* number of used 'token_char's */
     245    char *chars;                  /* the token's constituents */
     246  };
     247  
     248  /* Initialize a 'struct token'.  */
     249  static inline void
     250  init_token (struct token *tp)
     251  {
     252    tp->allocated = 10;
     253    tp->chars = XNMALLOC (tp->allocated, char);
     254    tp->charcount = 0;
     255  }
     256  
     257  /* Free the memory pointed to by a 'struct token'.  */
     258  static inline void
     259  free_token (struct token *tp)
     260  {
     261    free (tp->chars);
     262  }
     263  
     264  /* Ensure there is enough room in the token for one more character.  */
     265  static inline void
     266  grow_token (struct token *tp)
     267  {
     268    if (tp->charcount == tp->allocated)
     269      {
     270        tp->allocated *= 2;
     271        tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
     272      }
     273  }
     274  
     275  /* Convert a struct token * to a char*.  */
     276  static char *
     277  string_of_token (const struct token *tp)
     278  {
     279    char *str;
     280    int n;
     281  
     282    n = tp->charcount;
     283    str = XNMALLOC (n + 1, char);
     284    memcpy (str, tp->chars, n);
     285    str[n] = '\0';
     286    return str;
     287  }
     288  
     289  
     290  /* ========================= Accumulating messages ========================= */
     291  
     292  
     293  static message_list_ty *mlp;
     294  
     295  
     296  /* ========================= Accumulating comments ========================= */
     297  
     298  
     299  static char *buffer;
     300  static size_t bufmax;
     301  static size_t buflen;
     302  
     303  static inline void
     304  comment_start ()
     305  {
     306    buflen = 0;
     307  }
     308  
     309  static inline void
     310  comment_add (int c)
     311  {
     312    if (buflen >= bufmax)
     313      {
     314        bufmax = 2 * bufmax + 10;
     315        buffer = xrealloc (buffer, bufmax);
     316      }
     317    buffer[buflen++] = c;
     318  }
     319  
     320  static inline void
     321  comment_line_end ()
     322  {
     323    while (buflen >= 1
     324           && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
     325      --buflen;
     326    if (buflen >= bufmax)
     327      {
     328        bufmax = 2 * bufmax + 10;
     329        buffer = xrealloc (buffer, bufmax);
     330      }
     331    buffer[buflen] = '\0';
     332    savable_comment_add (buffer);
     333  }
     334  
     335  
     336  /* These are for tracking whether comments count as immediately before
     337     keyword.  */
     338  static int last_comment_line;
     339  static int last_non_comment_line;
     340  
     341  
     342  /* ========================= Debackslashification ========================== */
     343  
     344  /* This state tracks the effect of backquotes, double-quotes and single-quotes
     345     on the parsing of backslashes.  We make a single pass through the input
     346     file, keeping the state up to date.  This is much faster than accumulating
     347     strings and processing them with explicit debackslashification, like the
     348     shell does it.  */
     349  
     350  /* The number of nested `...` or "`...`" constructs.  Assumed to be <= 32.  */
     351  static unsigned int nested_backquotes;
     352  
     353  /* A bit mask indicating which of the currently open `...` or "`...`"
     354     constructs is with double-quotes: "`...`".
     355     A bit value of 1 stands for "`...`", a bit value of 0 stands for `...`.
     356     Bit position 0 designates the outermost backquotes nesting,
     357     bit position 1 the second-outermost backquotes nesting,
     358     ...
     359     bit position (nested_backquotes-1) the innermost backquotes nesting.  */
     360  static unsigned int open_doublequotes_mask;
     361  
     362  /* A bit indicating whether a double-quote is currently open inside the
     363     innermost backquotes nesting.  */
     364  static bool open_doublequote;
     365  
     366  /* A bit indicating whether a single-quote is currently open inside the
     367     innermost backquotes nesting.  */
     368  static bool open_singlequote;
     369  
     370  /* The expected terminator of the currently open single-quote.
     371     Usually '\'', but can be '"' for i18n-quotes.  */
     372  static char open_singlequote_terminator;
     373  
     374  
     375  /* Functions to update the state.  */
     376  
     377  static inline void
     378  saw_opening_backquote ()
     379  {
     380    if (open_singlequote)
     381      abort ();
     382    if (open_doublequote)
     383      open_doublequotes_mask |= (unsigned int) 1 << nested_backquotes;
     384    nested_backquotes++;
     385    open_doublequote = false;
     386  }
     387  
     388  static inline void
     389  saw_closing_backquote ()
     390  {
     391    nested_backquotes--;
     392    open_doublequote = (open_doublequotes_mask >> nested_backquotes) & 1;
     393    open_doublequotes_mask &= ((unsigned int) 1 << nested_backquotes) - 1;
     394    open_singlequote = false; /* just for safety */
     395  }
     396  
     397  static inline void
     398  saw_opening_doublequote ()
     399  {
     400    if (open_singlequote || open_doublequote)
     401      abort ();
     402    open_doublequote = true;
     403  }
     404  
     405  static inline void
     406  saw_closing_doublequote ()
     407  {
     408    if (open_singlequote || !open_doublequote)
     409      abort ();
     410    open_doublequote = false;
     411  }
     412  
     413  static inline void
     414  saw_opening_singlequote ()
     415  {
     416    if (open_doublequote || open_singlequote)
     417      abort ();
     418    open_singlequote = true;
     419    open_singlequote_terminator = '\'';
     420  }
     421  
     422  static inline void
     423  saw_closing_singlequote ()
     424  {
     425    if (open_doublequote || !open_singlequote)
     426      abort ();
     427    open_singlequote = false;
     428  }
     429  
     430  
     431  /* ========================== Reading of commands ========================== */
     432  
     433  /* We are only interested in constant strings.  Other words need not to be
     434     represented precisely.  */
     435  enum word_type
     436  {
     437    t_string,     /* constant string */
     438    t_assignment, /* variable assignment */
     439    t_other,      /* other string */
     440    t_separator,  /* command separator: semicolon or newline */
     441    t_redirect,   /* redirection: one of < > >| << <<- >> <> <& >& */
     442    t_backquote,  /* closing '`' pseudo word */
     443    t_paren,      /* closing ')' pseudo word */
     444    t_eof         /* EOF marker */
     445  };
     446  
     447  struct word
     448  {
     449    enum word_type type;
     450    struct token *token;          /* for t_string */
     451    int line_number_at_start;     /* for t_string */
     452  };
     453  
     454  /* Free the memory pointed to by a 'struct word'.  */
     455  static inline void
     456  free_word (struct word *wp)
     457  {
     458    if (wp->type == t_string)
     459      {
     460        free_token (wp->token);
     461        free (wp->token);
     462      }
     463  }
     464  
     465  /* Convert a t_string token to a char*.  */
     466  static char *
     467  string_of_word (const struct word *wp)
     468  {
     469    char *str;
     470    int n;
     471  
     472    if (!(wp->type == t_string))
     473      abort ();
     474    n = wp->token->charcount;
     475    str = XNMALLOC (n + 1, char);
     476    memcpy (str, wp->token->chars, n);
     477    str[n] = '\0';
     478    return str;
     479  }
     480  
     481  /* Convert a t_string token to a char*, ignoring the first OFFSET bytes.  */
     482  static char *
     483  substring_of_word (const struct word *wp, size_t offset)
     484  {
     485    char *str;
     486    int n;
     487  
     488    if (!(wp->type == t_string))
     489      abort ();
     490    n = wp->token->charcount;
     491    if (!(offset <= n))
     492      abort ();
     493    str = XNMALLOC (n - offset + 1, char);
     494    memcpy (str, wp->token->chars + offset, n - offset);
     495    str[n - offset] = '\0';
     496    return str;
     497  }
     498  
     499  
     500  /* Whitespace recognition.  */
     501  
     502  static inline bool
     503  is_whitespace (int c)
     504  {
     505    return (c == ' ' || c == '\t' || c == '\n');
     506  }
     507  
     508  /* Operator character recognition.  */
     509  
     510  static inline bool
     511  is_operator_start (int c)
     512  {
     513    return (c == '|' || c == '&' || c == ';' || c == '<' || c == '>'
     514            || c == '(' || c == ')');
     515  }
     516  
     517  
     518  /* Denotation of a quoted character.
     519     The distinction between quoted and unquoted character is important only for
     520     the special, whitespace and operator characters; it is irrelevant for
     521     alphanumeric characters, '\\' and many others.  */
     522  #define QUOTED(c) (UCHAR_MAX + 1 + (c))
     523  /* Values in the 'unsigned char' range are implicitly unquoted.  Among these,
     524     the following are important:
     525       '"'         opening or closing double quote
     526       '\''        opening or closing single quote
     527       '$'         the unknown result of a dollar expansion
     528       '`'         does not occur - replaced with OPENING_BACKQUOTE or
     529                   CLOSING_BACKQUOTE
     530   */
     531  #define OPENING_BACKQUOTE (2 * (UCHAR_MAX + 1) + '`')
     532  #define CLOSING_BACKQUOTE (3 * (UCHAR_MAX + 1) + '`')
     533  
     534  /* 2 characters of pushback are supported.
     535     2 characters of pushback occur only when the first is an 'x'; in all
     536     other cases only one character of pushback is needed.  */
     537  static int phase2_pushback[2];
     538  static int phase2_pushback_length;
     539  
     540  /* Return the next character, with backslashes removed.
     541     The result is QUOTED(c) for some unsigned char c, if the next character
     542     is escaped sufficiently often to make it a regular constituent character,
     543     or simply an 'unsigned char' if it has its special meaning (of special,
     544     whitespace or operator charcter), or OPENING_BACKQUOTE, CLOSING_BACKQUOTE,
     545     EOF.
     546     It's the caller's responsibility to update the state.  */
     547  static int
     548  phase2_getc ()
     549  {
     550    int c;
     551  
     552    if (phase2_pushback_length)
     553      {
     554        c = phase2_pushback[--phase2_pushback_length];
     555        if (c == '\n')
     556          ++line_number;
     557        return c;
     558      }
     559  
     560    c = phase1_getc ();
     561    if (c == EOF)
     562      return c;
     563    if (c == '\'')
     564      return ((open_doublequote
     565               || (open_singlequote && open_singlequote_terminator != c))
     566              ? QUOTED (c)
     567              : c);
     568    if (open_singlequote)
     569      {
     570        if (c == open_singlequote_terminator)
     571          return c;
     572      }
     573    else
     574      {
     575        if (c == '"' || c == '$')
     576          return c;
     577        if (c == '`')
     578          return (nested_backquotes > 0 ? CLOSING_BACKQUOTE : OPENING_BACKQUOTE);
     579      }
     580    if (c == '\\')
     581      {
     582        /* Number of debackslashification passes that are active at the
     583           current point.  */
     584        unsigned int debackslashify =
     585          nested_backquotes + (open_singlequote ? 0 : 1);
     586        /* Normal number of backslashes that yield a single backslash in the
     587           final output.  */
     588        unsigned int expected_count =
     589          (unsigned int) 1 << debackslashify;
     590        /* Number of backslashes found.  */
     591        unsigned int count;
     592  
     593        for (count = 1; count < expected_count; count++)
     594          {
     595            c = phase1_getc ();
     596            if (c != '\\')
     597              break;
     598          }
     599        if (count == expected_count)
     600          return '\\';
     601  
     602        /* The count of backslashes is > 0 and < expected_count, therefore the
     603           result depends on c, the first character after the backslashes.
     604           Note: The formulas below don't necessarily have a logic; they were
     605           empirically determined such that 1. the xgettext-sh-1 test succeeds,
     606           2. the behaviour for count == 0 would correspond to the one without
     607           any baskslash.  */
     608        if (c == '\'')
     609          {
     610            if (!open_singlequote && count > (expected_count >> 1))
     611              {
     612                phase1_ungetc (c);
     613                return '\\';
     614              }
     615            else
     616              return ((open_doublequote
     617                       || (open_singlequote
     618                           ? open_singlequote_terminator != c
     619                           : count == (expected_count >> 1)))
     620                      ? QUOTED (c)
     621                      : c);
     622          }
     623        else if (c == '"')
     624          {
     625            /* Each debackslashification pass converts \\ to \ and \" to ";
     626               passes corresponding to `...` drop a lone " whereas passes
     627               corresponding to "`...`" leave it alone.  Therefore, the
     628               minimum number of backslashes needed to get one double-quote
     629               in the end is  open_doublequotes_mask + 1.  */
     630            if (open_singlequote)
     631              {
     632                if (count > open_doublequotes_mask)
     633                  {
     634                    phase1_ungetc (c);
     635                    return '\\';
     636                  }
     637                else
     638                  return (open_singlequote_terminator != c ? QUOTED (c) : c);
     639              }
     640            else
     641              {
     642                if (count > open_doublequotes_mask)
     643                  return QUOTED (c);
     644                else
     645                  /* Some of the count values <= open_doublequotes_mask are
     646                     actually invalid here, but we assume a syntactically
     647                     correct input file anyway.  */
     648                  return c;
     649              }
     650          }
     651        else if (c == '`')
     652          {
     653            /* FIXME: This code looks fishy.  */
     654            if (count == expected_count - 1)
     655              return c;
     656            else
     657              /* Some of the count values < expected_count - 1 are
     658                 actually invalid here, but we assume a syntactically
     659                 correct input file anyway.  */
     660              if (nested_backquotes > 0 && !open_singlequote
     661                  && count >= (expected_count >> 2))
     662                return OPENING_BACKQUOTE;
     663              else
     664                return CLOSING_BACKQUOTE;
     665          }
     666        else if (c == '$')
     667          {
     668            if (open_singlequote)
     669              return QUOTED (c);
     670            if (count >= (expected_count >> 1))
     671              return QUOTED (c);
     672            else
     673              return c;
     674          }
     675        else
     676          {
     677            /* When not followed by a quoting character or backslash or dollar,
     678               a backslash survives a debackslashification pass unmodified.
     679               Therefore each debackslashification pass performs a
     680                 count := (count + 1) >> 1
     681               operation.  Therefore the minimum number of backslashes needed
     682               to get one backslash in the end is  (expected_count >> 1) + 1.  */
     683            if (open_doublequote || open_singlequote)
     684              {
     685                if (count > 0)
     686                  {
     687                    phase1_ungetc (c);
     688                    return '\\';
     689                  }
     690                else
     691                  return QUOTED (c);
     692              }
     693            else
     694              {
     695                if (count > (expected_count >> 1))
     696                  {
     697                    phase1_ungetc (c);
     698                    return '\\';
     699                  }
     700                else if (count > 0)
     701                  return QUOTED (c);
     702                else
     703                  return c;
     704              }
     705          }
     706      }
     707  
     708    return (open_singlequote || open_doublequote ? QUOTED (c) : c);
     709  }
     710  
     711  /* Supports 2 characters of pushback.  */
     712  static void
     713  phase2_ungetc (int c)
     714  {
     715    switch (c)
     716      {
     717      case EOF:
     718        break;
     719  
     720      case '\n':
     721        --line_number;
     722        FALLTHROUGH;
     723  
     724      default:
     725        if (phase2_pushback_length == SIZEOF (phase2_pushback))
     726          abort ();
     727        phase2_pushback[phase2_pushback_length++] = c;
     728        break;
     729      }
     730  }
     731  
     732  
     733  /* Context lookup table.  */
     734  static flag_context_list_table_ty *flag_context_list_table;
     735  
     736  
     737  /* Maximum supported nesting depth.  */
     738  #define MAX_NESTING_DEPTH 1000
     739  
     740  /* Current nesting depth.  */
     741  static int nesting_depth;
     742  
     743  
     744  /* Forward declaration of local functions.  */
     745  static enum word_type read_command_list (int looking_for,
     746                                           flag_context_ty outer_context);
     747  
     748  
     749  
     750  /* Read the next word.
     751     'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
     752     or '\0'.  */
     753  static void
     754  read_word (struct word *wp, int looking_for, flag_context_ty context)
     755  {
     756    int c;
     757    bool all_unquoted_digits;
     758    bool all_unquoted_name_characters;
     759  
     760    do
     761      {
     762        c = phase2_getc ();
     763        if (c == '#')
     764          {
     765            /* Skip a comment up to end of line.  */
     766            last_comment_line = line_number;
     767            comment_start ();
     768            for (;;)
     769              {
     770                c = phase1_getc ();
     771                if (c == EOF || c == '\n')
     772                  break;
     773                /* We skip all leading white space, but not EOLs.  */
     774                if (!(buflen == 0 && (c == ' ' || c == '\t')))
     775                  comment_add (c);
     776              }
     777            comment_line_end ();
     778          }
     779        if (c == '\n')
     780          {
     781            /* Comments assumed to be grouped with a message must immediately
     782               precede it, with no non-whitespace token on a line between
     783               both.  */
     784            if (last_non_comment_line > last_comment_line)
     785              savable_comment_reset ();
     786            wp->type = t_separator;
     787            return;
     788          }
     789      }
     790    while (is_whitespace (c));
     791  
     792    if (c == EOF)
     793      {
     794        wp->type = t_eof;
     795        return;
     796      }
     797  
     798    if (c == '<' || c == '>')
     799      {
     800        /* Recognize the redirection operators < > >| << <<- >> <> <& >&
     801           But <( and >) are handled below, not here.  */
     802        int c2 = phase2_getc ();
     803        if (c2 != '(')
     804          {
     805            if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
     806              {
     807                if (c == '<' && c2 == '<')
     808                  {
     809                    int c3 = phase2_getc ();
     810                    if (c3 != '-')
     811                      phase2_ungetc (c3);
     812                  }
     813              }
     814            else
     815              phase2_ungetc (c2);
     816            wp->type = t_redirect;
     817            return;
     818          }
     819        else
     820          phase2_ungetc (c2);
     821      }
     822  
     823    if (c == CLOSING_BACKQUOTE)
     824      {
     825        if (looking_for == CLOSING_BACKQUOTE)
     826          {
     827            saw_closing_backquote ();
     828            wp->type = t_backquote;
     829            last_non_comment_line = line_number;
     830            return;
     831          }
     832        else if (looking_for == ')')
     833          {
     834            /* The input is invalid syntax, such as `a<(`
     835               Push back the closing backquote and pretend that we have seen a
     836               closing parenthesis.  */
     837            phase2_ungetc (c);
     838            wp->type = t_paren;
     839            last_non_comment_line = line_number;
     840            return;
     841          }
     842        else
     843          /* We shouldn't be reading a CLOSING_BACKQUOTE when
     844             looking_for == '\0'.  */
     845          abort ();
     846      }
     847  
     848    if (looking_for == ')' && c == ')')
     849      {
     850        wp->type = t_paren;
     851        last_non_comment_line = line_number;
     852        return;
     853      }
     854  
     855    if (is_operator_start (c))
     856      {
     857        wp->type = (c == ';' ? t_separator : t_other);
     858        return;
     859      }
     860  
     861    wp->type = t_string;
     862    wp->token = XMALLOC (struct token);
     863    init_token (wp->token);
     864    wp->line_number_at_start = line_number;
     865    /* True while all characters in the token seen so far are digits.  */
     866    all_unquoted_digits = true;
     867    /* True while all characters in the token seen so far form a "name":
     868       all characters are unquoted underscores, digits, or alphabetics from the
     869       portable character set, and the first character is not a digit.  Cf.
     870       <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_235>
     871     */
     872    all_unquoted_name_characters = true;
     873  
     874    for (;; c = phase2_getc ())
     875      {
     876        if (c == EOF)
     877          break;
     878  
     879        if (all_unquoted_digits && (c == '<' || c == '>'))
     880          {
     881            /* Recognize the redirection operators < > >| << <<- >> <> <& >&
     882               prefixed with a nonempty sequence of unquoted digits.  */
     883            int c2 = phase2_getc ();
     884            if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
     885              {
     886                if (c == '<' && c2 == '<')
     887                  {
     888                    int c3 = phase2_getc ();
     889                    if (c3 != '-')
     890                      phase2_ungetc (c3);
     891                  }
     892              }
     893            else
     894              phase2_ungetc (c2);
     895  
     896            wp->type = t_redirect;
     897            free_token (wp->token);
     898            free (wp->token);
     899  
     900            last_non_comment_line = line_number;
     901  
     902            return;
     903          }
     904  
     905        all_unquoted_digits = all_unquoted_digits && (c >= '0' && c <= '9');
     906  
     907        if (all_unquoted_name_characters && wp->token->charcount > 0 && c == '=')
     908          {
     909            wp->type = t_assignment;
     910            continue;
     911          }
     912  
     913        all_unquoted_name_characters =
     914           all_unquoted_name_characters
     915           && ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
     916               || (wp->token->charcount > 0 && c >= '0' && c <= '9'));
     917  
     918        if (c == '$')
     919          {
     920            int c2;
     921  
     922            /* An unquoted dollar indicates we are not inside '...'.  */
     923            if (open_singlequote)
     924              abort ();
     925            /* After reading a dollar, we know that there is no pushed back
     926               character from an earlier lookahead.  */
     927            if (phase2_pushback_length > 0)
     928              abort ();
     929            /* Therefore we can use phase1 without interfering with phase2.
     930               We need to recognize $( outside and inside double-quotes.
     931               It would be incorrect to do
     932                  c2 = phase2_getc ();
     933                  if (c2 == '(' || c2 == QUOTED ('('))
     934               because that would also trigger for $\(.  */
     935            c2 = phase1_getc ();
     936            if (c2 == '(')
     937              {
     938                bool saved_open_doublequote;
     939                int c3;
     940  
     941                phase1_ungetc (c2);
     942  
     943                /* The entire inner command or arithmetic expression is read
     944                   ignoring possible surrounding double-quotes.  */
     945                saved_open_doublequote = open_doublequote;
     946                open_doublequote = false;
     947  
     948                c2 = phase2_getc ();
     949                if (c2 != '(')
     950                  abort ();
     951  
     952                c3 = phase2_getc ();
     953                if (c3 == '(')
     954                  {
     955                    /* Arithmetic expression (Bash syntax).  Skip until the
     956                       matching closing parenthesis.  */
     957                    unsigned int depth = 2;
     958  
     959                    do
     960                      {
     961                        c = phase2_getc ();
     962                        if (c == '(')
     963                          depth++;
     964                        else if (c == ')')
     965                          if (--depth == 0)
     966                            break;
     967                      }
     968                    while (c != EOF);
     969                  }
     970                else
     971                  {
     972                    /* Command substitution (Bash syntax).  */
     973                    phase2_ungetc (c3);
     974                    ++nesting_depth;
     975                    read_command_list (')', context);
     976                    nesting_depth--;
     977                  }
     978  
     979                open_doublequote = saved_open_doublequote;
     980              }
     981            else
     982              {
     983                phase1_ungetc (c2);
     984                c2 = phase2_getc ();
     985  
     986                if (c2 == '\'' && !open_singlequote)
     987                  {
     988                    /* Bash builtin for string with ANSI-C escape sequences.  */
     989                    for (;;)
     990                      {
     991                        /* We have to use phase1 throughout this loop,
     992                           because phase2 does debackslashification,
     993                           which is undesirable when parsing ANSI-C
     994                           escape sequences.  */
     995                        c = phase1_getc ();
     996                        if (c == EOF)
     997                          break;
     998                        if (c == '\'')
     999                          break;
    1000                        if (c == '\\')
    1001                          {
    1002                            c = phase1_getc ();
    1003                            switch (c)
    1004                              {
    1005                              default:
    1006                                phase1_ungetc (c);
    1007                                c = '\\';
    1008                                break;
    1009  
    1010                              case '\\':
    1011                                break;
    1012                              case '\'':
    1013                                break;
    1014                              case '"':
    1015                                break;
    1016  
    1017                              case 'a':
    1018                                c = '\a';
    1019                                break;
    1020                              case 'b':
    1021                                c = '\b';
    1022                                break;
    1023                              case 'e':
    1024                              case 'E':
    1025                                c = 0x1b; /* ESC */
    1026                                break;
    1027                              case 'f':
    1028                                c = '\f';
    1029                                break;
    1030                              case 'n':
    1031                                c = '\n';
    1032                                break;
    1033                              case 'r':
    1034                                c = '\r';
    1035                                break;
    1036                              case 't':
    1037                                c = '\t';
    1038                                break;
    1039                              case 'v':
    1040                                c = '\v';
    1041                                break;
    1042  
    1043                              case 'x':
    1044                                c = phase1_getc ();
    1045                                if ((c >= '0' && c <= '9')
    1046                                    || (c >= 'A' && c <= 'F')
    1047                                    || (c >= 'a' && c <= 'f'))
    1048                                  {
    1049                                    int n;
    1050  
    1051                                    if (c >= '0' && c <= '9')
    1052                                      n = c - '0';
    1053                                    else if (c >= 'A' && c <= 'F')
    1054                                      n = 10 + c - 'A';
    1055                                    else if (c >= 'a' && c <= 'f')
    1056                                      n = 10 + c - 'a';
    1057                                    else
    1058                                      abort ();
    1059  
    1060                                    c = phase1_getc ();
    1061                                    if ((c >= '0' && c <= '9')
    1062                                        || (c >= 'A' && c <= 'F')
    1063                                        || (c >= 'a' && c <= 'f'))
    1064                                      {
    1065                                        if (c >= '0' && c <= '9')
    1066                                          n = n * 16 + c - '0';
    1067                                        else if (c >= 'A' && c <= 'F')
    1068                                          n = n * 16 + 10 + c - 'A';
    1069                                        else if (c >= 'a' && c <= 'f')
    1070                                          n = n * 16 + 10 + c - 'a';
    1071                                        else
    1072                                          abort ();
    1073                                      }
    1074                                    else
    1075                                      phase1_ungetc (c);
    1076  
    1077                                    c = n;
    1078                                  }
    1079                                else
    1080                                  {
    1081                                    phase1_ungetc (c);
    1082                                    phase1_ungetc ('x');
    1083                                    c = '\\';
    1084                                  }
    1085                                break;
    1086  
    1087                              case '0': case '1': case '2': case '3':
    1088                              case '4': case '5': case '6': case '7':
    1089                                {
    1090                                  int n = c - '0';
    1091  
    1092                                  c = phase1_getc ();
    1093                                  if (c >= '0' && c <= '7')
    1094                                    {
    1095                                      n = n * 8 + c - '0';
    1096  
    1097                                      c = phase1_getc ();
    1098                                      if (c >= '0' && c <= '7')
    1099                                        n = n * 8 + c - '0';
    1100                                      else
    1101                                        phase1_ungetc (c);
    1102                                    }
    1103                                  else
    1104                                    phase1_ungetc (c);
    1105  
    1106                                  c = n;
    1107                                }
    1108                                break;
    1109                              }
    1110                          }
    1111                        if (wp->type == t_string)
    1112                          {
    1113                            grow_token (wp->token);
    1114                            wp->token->chars[wp->token->charcount++] =
    1115                              (unsigned char) c;
    1116                          }
    1117                      }
    1118                    /* The result is a literal string.  Don't change wp->type.  */
    1119                    continue;
    1120                  }
    1121                else if (c2 == '"' && !open_doublequote)
    1122                  {
    1123                    /* Bash builtin for internationalized string.  */
    1124                    lex_pos_ty pos;
    1125                    struct token string;
    1126  
    1127                    saw_opening_singlequote ();
    1128                    open_singlequote_terminator = '"';
    1129                    pos.file_name = logical_file_name;
    1130                    pos.line_number = line_number;
    1131                    init_token (&string);
    1132                    for (;;)
    1133                      {
    1134                        c = phase2_getc ();
    1135                        if (c == EOF)
    1136                          break;
    1137                        if (c == '"')
    1138                          {
    1139                            saw_closing_singlequote ();
    1140                            break;
    1141                          }
    1142                        grow_token (&string);
    1143                        string.chars[string.charcount++] = (unsigned char) c;
    1144                      }
    1145                    remember_a_message (mlp, NULL, string_of_token (&string),
    1146                                        false, false, context, &pos,
    1147                                        NULL, savable_comment, false);
    1148                    free_token (&string);
    1149  
    1150                    error_with_progname = false;
    1151                    error (0, 0, _("%s:%lu: warning: the syntax $\"...\" is deprecated due to security reasons; use eval_gettext instead"),
    1152                           pos.file_name, (unsigned long) pos.line_number);
    1153                    error_with_progname = true;
    1154  
    1155                    /* The result at runtime is not constant. Therefore we
    1156                       change wp->type.  */
    1157                  }
    1158                else
    1159                  phase2_ungetc (c2);
    1160              }
    1161            wp->type = t_other;
    1162            continue;
    1163          }
    1164  
    1165        if (c == '\'')
    1166          {
    1167            if (!open_singlequote)
    1168              {
    1169                /* Handle an opening single quote.  */
    1170                saw_opening_singlequote ();
    1171              }
    1172            else
    1173              {
    1174                /* Handle a closing single quote.  */
    1175                saw_closing_singlequote ();
    1176              }
    1177            continue;
    1178          }
    1179  
    1180        if (c == '"')
    1181          {
    1182            if (open_singlequote && open_singlequote_terminator == '"')
    1183              {
    1184                /* Handle a closing i18n quote.  */
    1185                saw_closing_singlequote ();
    1186              }
    1187            else if (!open_doublequote)
    1188              {
    1189                /* Handle an opening double quote.  */
    1190                saw_opening_doublequote ();
    1191              }
    1192            else
    1193              {
    1194                /* Handle a closing double quote.  */
    1195                saw_closing_doublequote ();
    1196              }
    1197            continue;
    1198          }
    1199  
    1200        if (c == OPENING_BACKQUOTE)
    1201          {
    1202            /* Handle an opening backquote.  */
    1203            saw_opening_backquote ();
    1204  
    1205            ++nesting_depth;
    1206            read_command_list (CLOSING_BACKQUOTE, context);
    1207            nesting_depth--;
    1208  
    1209            wp->type = t_other;
    1210            continue;
    1211          }
    1212        if (c == CLOSING_BACKQUOTE)
    1213          break;
    1214  
    1215        if (c == '<' || c == '>')
    1216          {
    1217            int c2;
    1218  
    1219            /* An unquoted c indicates we are not inside '...' nor "...".  */
    1220            if (open_singlequote || open_doublequote)
    1221              abort ();
    1222  
    1223            c2 = phase2_getc ();
    1224            if (c2 == '(')
    1225              {
    1226                /* Process substitution (Bash syntax).  */
    1227                ++nesting_depth;
    1228                read_command_list (')', context);
    1229                nesting_depth--;
    1230  
    1231                wp->type = t_other;
    1232                continue;
    1233              }
    1234            else
    1235              phase2_ungetc (c2);
    1236          }
    1237  
    1238        if (!open_singlequote && !open_doublequote
    1239            && (is_whitespace (c) || is_operator_start (c)))
    1240          break;
    1241  
    1242        if (wp->type == t_string)
    1243          {
    1244            grow_token (wp->token);
    1245            wp->token->chars[wp->token->charcount++] = (unsigned char) c;
    1246          }
    1247      }
    1248  
    1249    phase2_ungetc (c);
    1250  
    1251    if (wp->type != t_string)
    1252      {
    1253        free_token (wp->token);
    1254        free (wp->token);
    1255      }
    1256    last_non_comment_line = line_number;
    1257  }
    1258  
    1259  
    1260  /* Read the next command.
    1261     'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
    1262     or '\0'.
    1263     Returns the type of the word that terminated the command.  */
    1264  static enum word_type
    1265  read_command (int looking_for, flag_context_ty outer_context)
    1266  {
    1267    /* Read the words that make up the command.
    1268       Here we completely ignore field splitting at whitespace and wildcard
    1269       expansions; i.e. we assume that the source is written in such a way that
    1270       every word in the program determines exactly one word in the resulting
    1271       command.
    1272       But we do not require that the 'gettext'/'ngettext' command is the
    1273       first in the command; this is because 1. we want to allow for prefixes
    1274       like "$verbose" that may expand to nothing, and 2. it's a big effort
    1275       to know where a command starts in a $(for ...) or $(case ...) compound
    1276       command.  */
    1277    int arg = 0;                  /* Current argument number.  */
    1278    bool arg_of_redirect = false; /* True right after a redirection operator.  */
    1279    bool must_expand_arg_strings = false; /* True if need to expand escape
    1280                                             sequences in arguments.  */
    1281    flag_context_list_iterator_ty context_iter;
    1282    const struct callshapes *shapes = NULL;
    1283    struct arglist_parser *argparser = NULL;
    1284  
    1285    for (;;)
    1286      {
    1287        struct word inner;
    1288        flag_context_ty inner_context;
    1289  
    1290        if (arg == 0)
    1291          inner_context = null_context;
    1292        else
    1293          inner_context =
    1294            inherited_context (outer_context,
    1295                               flag_context_list_iterator_advance (
    1296                                 &context_iter));
    1297  
    1298        read_word (&inner, looking_for, inner_context);
    1299  
    1300        /* Recognize end of command.  */
    1301        if (inner.type == t_separator
    1302            || inner.type == t_backquote || inner.type == t_paren
    1303            || inner.type == t_eof)
    1304          {
    1305            if (argparser != NULL)
    1306              arglist_parser_done (argparser, arg);
    1307            return inner.type;
    1308          }
    1309  
    1310        if (extract_all)
    1311          {
    1312            if (inner.type == t_string)
    1313              {
    1314                lex_pos_ty pos;
    1315  
    1316                pos.file_name = logical_file_name;
    1317                pos.line_number = inner.line_number_at_start;
    1318                remember_a_message (mlp, NULL, string_of_word (&inner), false,
    1319                                    false, inner_context, &pos,
    1320                                    NULL, savable_comment, false);
    1321              }
    1322          }
    1323  
    1324        if (arg_of_redirect)
    1325          {
    1326            /* Ignore arguments of redirection operators.  */
    1327            arg_of_redirect = false;
    1328          }
    1329        else if (inner.type == t_redirect)
    1330          {
    1331            /* Ignore this word and the following one.  */
    1332            arg_of_redirect = true;
    1333          }
    1334        else
    1335          {
    1336            bool matters_for_argparser = true;
    1337  
    1338            if (argparser == NULL)
    1339              {
    1340                /* This is the function position.  */
    1341                arg = 0;
    1342                if (inner.type == t_assignment)
    1343                  {
    1344                    /* An assignment just sets an environment variable.
    1345                       Ignore it.  */
    1346                    /* Don't increment arg in this round.  */
    1347                    matters_for_argparser = false;
    1348                  }
    1349                else if (inner.type == t_string)
    1350                  {
    1351                    char *function_name = string_of_word (&inner);
    1352  
    1353                    if (strcmp (function_name, "env") == 0)
    1354                      {
    1355                        /* The 'env' command just introduces more assignments.
    1356                           Ignore it.  */
    1357                        /* Don't increment arg in this round.  */
    1358                        matters_for_argparser = false;
    1359                      }
    1360                    else
    1361                      {
    1362                        void *keyword_value;
    1363  
    1364                        if (hash_find_entry (&keywords,
    1365                                             function_name,
    1366                                             strlen (function_name),
    1367                                             &keyword_value)
    1368                            == 0)
    1369                          shapes = (const struct callshapes *) keyword_value;
    1370  
    1371                        argparser = arglist_parser_alloc (mlp, shapes);
    1372  
    1373                        context_iter =
    1374                          flag_context_list_iterator (
    1375                            flag_context_list_table_lookup (
    1376                              flag_context_list_table,
    1377                              function_name, strlen (function_name)));
    1378                      }
    1379  
    1380                    free (function_name);
    1381                  }
    1382                else
    1383                  context_iter = null_context_list_iterator;
    1384              }
    1385            else
    1386              {
    1387                /* These are the argument positions.  */
    1388                if (inner.type == t_string)
    1389                  {
    1390                    bool accepts_context =
    1391                      ((argparser->keyword_len == 7
    1392                        && memcmp (argparser->keyword, "gettext", 7) == 0)
    1393                       || (argparser->keyword_len == 8
    1394                           && memcmp (argparser->keyword, "ngettext", 8) == 0));
    1395                    bool accepts_expand =
    1396                      ((argparser->keyword_len == 7
    1397                        && memcmp (argparser->keyword, "gettext", 7) == 0)
    1398                       || (argparser->keyword_len == 8
    1399                           && memcmp (argparser->keyword, "ngettext", 8) == 0));
    1400                    if (accepts_context && argparser->next_is_msgctxt)
    1401                      {
    1402                        char *s = string_of_word (&inner);
    1403                        mixed_string_ty *ms =
    1404                          mixed_string_alloc_simple (s, lc_string,
    1405                                                     logical_file_name,
    1406                                                     inner.line_number_at_start);
    1407                        free (s);
    1408                        argparser->next_is_msgctxt = false;
    1409                        arglist_parser_remember_msgctxt (argparser, ms,
    1410                                                         inner_context,
    1411                                                         logical_file_name,
    1412                                                         inner.line_number_at_start);
    1413                        matters_for_argparser = false;
    1414                      }
    1415                    else if (accepts_context
    1416                             && ((inner.token->charcount == 2
    1417                                  && memcmp (inner.token->chars, "-c", 2) == 0)
    1418                                 || (inner.token->charcount == 9
    1419                                     && memcmp (inner.token->chars, "--context", 9) == 0)))
    1420                      {
    1421                        argparser->next_is_msgctxt = true;
    1422                        matters_for_argparser = false;
    1423                      }
    1424                    else if (accepts_context
    1425                             && (inner.token->charcount >= 10
    1426                                 && memcmp (inner.token->chars, "--context=", 10) == 0))
    1427                      {
    1428                        char *s = substring_of_word (&inner, 10);
    1429                        mixed_string_ty *ms =
    1430                          mixed_string_alloc_simple (s, lc_string,
    1431                                                     logical_file_name,
    1432                                                     inner.line_number_at_start);
    1433                        free (s);
    1434                        argparser->next_is_msgctxt = false;
    1435                        arglist_parser_remember_msgctxt (argparser, ms,
    1436                                                         inner_context,
    1437                                                         logical_file_name,
    1438                                                         inner.line_number_at_start);
    1439                        matters_for_argparser = false;
    1440                      }
    1441                    else if (accepts_expand
    1442                             && inner.token->charcount == 2
    1443                             && memcmp (inner.token->chars, "-e", 2) == 0)
    1444                      {
    1445                        must_expand_arg_strings = true;
    1446                        matters_for_argparser = false;
    1447                      }
    1448                    else
    1449                      {
    1450                        char *s = string_of_word (&inner);
    1451                        mixed_string_ty *ms;
    1452  
    1453                        /* When '-e' was specified, expand escape sequences in s.  */
    1454                        if (accepts_expand && must_expand_arg_strings)
    1455                          {
    1456                            bool expands_backslash_c =
    1457                              (argparser->keyword_len == 7
    1458                               && memcmp (argparser->keyword, "gettext", 7) == 0);
    1459                            bool backslash_c = false;
    1460                            char *expanded =
    1461                              (char *)
    1462                              expand_escapes (s, expands_backslash_c ? &backslash_c : NULL);
    1463                            /* We can ignore the value of expands_backslash_c, because
    1464                               here we don't support the gettext '-s' option.  */
    1465                            if (expanded != s)
    1466                              free (s);
    1467                            s = expanded;
    1468                          }
    1469  
    1470                        ms = mixed_string_alloc_simple (s, lc_string,
    1471                                                        logical_file_name,
    1472                                                        inner.line_number_at_start);
    1473                        free (s);
    1474                        arglist_parser_remember (argparser, arg, ms,
    1475                                                 inner_context,
    1476                                                 logical_file_name,
    1477                                                 inner.line_number_at_start,
    1478                                                 savable_comment, false);
    1479                      }
    1480                  }
    1481  
    1482                if (matters_for_argparser)
    1483                  if (arglist_parser_decidedp (argparser, arg))
    1484                    {
    1485                      /* Stop looking for arguments of the last function_name.  */
    1486                      /* FIXME: What about context_iter?  */
    1487                      arglist_parser_done (argparser, arg);
    1488                      shapes = NULL;
    1489                      argparser = NULL;
    1490                    }
    1491              }
    1492  
    1493            if (matters_for_argparser)
    1494              arg++;
    1495          }
    1496  
    1497        free_word (&inner);
    1498      }
    1499  }
    1500  
    1501  
    1502  /* Read a list of commands.
    1503     'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
    1504     or '\0'.
    1505     Returns the type of the word that terminated the command list.  */
    1506  static enum word_type
    1507  read_command_list (int looking_for, flag_context_ty outer_context)
    1508  {
    1509    if (nesting_depth > MAX_NESTING_DEPTH)
    1510      {
    1511        error_with_progname = false;
    1512        error (EXIT_FAILURE, 0, _("%s:%d: error: too deeply nested command list"),
    1513               logical_file_name, line_number);
    1514      }
    1515    for (;;)
    1516      {
    1517        enum word_type terminator;
    1518  
    1519        terminator = read_command (looking_for, outer_context);
    1520        if (terminator != t_separator)
    1521          return terminator;
    1522      }
    1523  }
    1524  
    1525  
    1526  void
    1527  extract_sh (FILE *f,
    1528              const char *real_filename, const char *logical_filename,
    1529              flag_context_list_table_ty *flag_table,
    1530              msgdomain_list_ty *mdlp)
    1531  {
    1532    mlp = mdlp->item[0]->messages;
    1533  
    1534    fp = f;
    1535    real_file_name = real_filename;
    1536    logical_file_name = xstrdup (logical_filename);
    1537    line_number = 1;
    1538  
    1539    phase1_pushback_length = 0;
    1540  
    1541    last_comment_line = -1;
    1542    last_non_comment_line = -1;
    1543  
    1544    nested_backquotes = 0;
    1545    open_doublequotes_mask = 0;
    1546    open_doublequote = false;
    1547    open_singlequote = false;
    1548  
    1549    phase2_pushback_length = 0;
    1550  
    1551    flag_context_list_table = flag_table;
    1552    nesting_depth = 0;
    1553  
    1554    init_keywords ();
    1555  
    1556    /* Eat tokens until eof is seen.  */
    1557    read_command_list ('\0', null_context);
    1558  
    1559    fp = NULL;
    1560    real_file_name = NULL;
    1561    logical_file_name = NULL;
    1562    line_number = 0;
    1563  }