1  /* xgettext awk backend.
       2     Copyright (C) 2002-2003, 2005-2009, 2018-2023 Free Software Foundation, Inc.
       3  
       4     This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
       5  
       6     This program is free software: you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation; either version 3 of the License, or
       9     (at your option) any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifdef HAVE_CONFIG_H
      20  # include "config.h"
      21  #endif
      22  
      23  /* Specification.  */
      24  #include "x-awk.h"
      25  
      26  #include <errno.h>
      27  #include <stdbool.h>
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <string.h>
      31  
      32  #include "attribute.h"
      33  #include "message.h"
      34  #include "xgettext.h"
      35  #include "xg-pos.h"
      36  #include "xg-mixed-string.h"
      37  #include "xg-arglist-context.h"
      38  #include "xg-arglist-callshape.h"
      39  #include "xg-arglist-parser.h"
      40  #include "xg-message.h"
      41  #include "error.h"
      42  #include "error-progname.h"
      43  #include "xalloc.h"
      44  #include "gettext.h"
      45  
      46  #define _(s) gettext(s)
      47  
      48  
      49  /* The awk syntax is defined in the gawk manual page and documentation.
      50     See also gawk/awkgram.y.  */
      51  
      52  
      53  /* ====================== Keyword set customization.  ====================== */
      54  
      55  /* If true extract all strings.  */
      56  static bool extract_all = false;
      57  
      58  static hash_table keywords;
      59  static bool default_keywords = true;
      60  
      61  
      62  void
      63  x_awk_extract_all ()
      64  {
      65    extract_all = true;
      66  }
      67  
      68  
      69  void
      70  x_awk_keyword (const char *name)
      71  {
      72    if (name == NULL)
      73      default_keywords = false;
      74    else
      75      {
      76        const char *end;
      77        struct callshape shape;
      78        const char *colon;
      79  
      80        if (keywords.table == NULL)
      81          hash_init (&keywords, 100);
      82  
      83        split_keywordspec (name, &end, &shape);
      84  
      85        /* The characters between name and end should form a valid C identifier.
      86           A colon means an invalid parse in split_keywordspec().  */
      87        colon = strchr (name, ':');
      88        if (colon == NULL || colon >= end)
      89          insert_keyword_callshape (&keywords, name, end - name, &shape);
      90      }
      91  }
      92  
      93  /* Finish initializing the keywords hash table.
      94     Called after argument processing, before each file is processed.  */
      95  static void
      96  init_keywords ()
      97  {
      98    if (default_keywords)
      99      {
     100        /* When adding new keywords here, also update the documentation in
     101           xgettext.texi!  */
     102        x_awk_keyword ("dcgettext");
     103        x_awk_keyword ("dcngettext:1,2");
     104        default_keywords = false;
     105      }
     106  }
     107  
     108  void
     109  init_flag_table_awk ()
     110  {
     111    xgettext_record_flag ("dcgettext:1:pass-awk-format");
     112    xgettext_record_flag ("dcngettext:1:pass-awk-format");
     113    xgettext_record_flag ("dcngettext:2:pass-awk-format");
     114    xgettext_record_flag ("printf:1:awk-format");
     115  }
     116  
     117  
     118  /* ======================== Reading of characters.  ======================== */
     119  
     120  /* The input file stream.  */
     121  static FILE *fp;
     122  
     123  /* These are for tracking whether comments count as immediately before
     124     keyword.  */
     125  static int last_comment_line;
     126  static int last_non_comment_line;
     127  
     128  
     129  /* 1. line_number handling.  */
     130  
     131  static int
     132  phase1_getc ()
     133  {
     134    int c = getc (fp);
     135  
     136    if (c == EOF)
     137      {
     138        if (ferror (fp))
     139          error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
     140                 real_file_name);
     141        return EOF;
     142      }
     143  
     144    if (c == '\n')
     145      line_number++;
     146  
     147    return c;
     148  }
     149  
     150  /* Supports only one pushback character.  */
     151  static void
     152  phase1_ungetc (int c)
     153  {
     154    if (c != EOF)
     155      {
     156        if (c == '\n')
     157          --line_number;
     158  
     159        ungetc (c, fp);
     160      }
     161  }
     162  
     163  
     164  /* 2. Replace each comment that is not inside a string literal or regular
     165     expression with a newline character.  We need to remember the comment
     166     for later, because it may be attached to a keyword string.  */
     167  
     168  static int
     169  phase2_getc ()
     170  {
     171    static char *buffer;
     172    static size_t bufmax;
     173    size_t buflen;
     174    int lineno;
     175    int c;
     176  
     177    c = phase1_getc ();
     178    if (c == '#')
     179      {
     180        buflen = 0;
     181        lineno = line_number;
     182        for (;;)
     183          {
     184            c = phase1_getc ();
     185            if (c == '\n' || c == EOF)
     186              break;
     187            /* We skip all leading white space, but not EOLs.  */
     188            if (!(buflen == 0 && (c == ' ' || c == '\t')))
     189              {
     190                if (buflen >= bufmax)
     191                  {
     192                    bufmax = 2 * bufmax + 10;
     193                    buffer = xrealloc (buffer, bufmax);
     194                  }
     195                buffer[buflen++] = c;
     196              }
     197          }
     198        if (buflen >= bufmax)
     199          {
     200            bufmax = 2 * bufmax + 10;
     201            buffer = xrealloc (buffer, bufmax);
     202          }
     203        buffer[buflen] = '\0';
     204        savable_comment_add (buffer);
     205        last_comment_line = lineno;
     206      }
     207    return c;
     208  }
     209  
     210  /* Supports only one pushback character.  */
     211  static void
     212  phase2_ungetc (int c)
     213  {
     214    if (c != EOF)
     215      phase1_ungetc (c);
     216  }
     217  
     218  
     219  /* ========================== Reading of tokens.  ========================== */
     220  
     221  
     222  enum token_type_ty
     223  {
     224    token_type_eof,
     225    token_type_lparen,            /* ( */
     226    token_type_rparen,            /* ) */
     227    token_type_comma,             /* , */
     228    token_type_string,            /* "abc" */
     229    token_type_i18nstring,        /* _"abc" */
     230    token_type_symbol,            /* symbol, number */
     231    token_type_semicolon,         /* ; */
     232    token_type_other              /* regexp, misc. operator */
     233  };
     234  typedef enum token_type_ty token_type_ty;
     235  
     236  typedef struct token_ty token_ty;
     237  struct token_ty
     238  {
     239    token_type_ty type;
     240    char *string;         /* for token_type_{symbol,string,i18nstring} */
     241    int line_number;
     242  };
     243  
     244  
     245  /* 7. Replace escape sequences within character strings with their
     246     single character equivalents.  */
     247  
     248  #define P7_QUOTES (1000 + '"')
     249  
     250  static int
     251  phase7_getc ()
     252  {
     253    int c;
     254  
     255    for (;;)
     256      {
     257        /* Use phase 1, because phase 2 elides comments.  */
     258        c = phase1_getc ();
     259  
     260        if (c == EOF || c == '\n')
     261          break;
     262        if (c == '"')
     263          return P7_QUOTES;
     264        if (c != '\\')
     265          return c;
     266        c = phase1_getc ();
     267        if (c == EOF)
     268          break;
     269        if (c != '\n')
     270          switch (c)
     271            {
     272            case 'a':
     273              return '\a';
     274            case 'b':
     275              return '\b';
     276            case 'f':
     277              return '\f';
     278            case 'n':
     279              return '\n';
     280            case 'r':
     281              return '\r';
     282            case 't':
     283              return '\t';
     284            case 'v':
     285              return '\v';
     286            case '0': case '1': case '2': case '3': case '4':
     287            case '5': case '6': case '7':
     288              {
     289                int n = c - '0';
     290  
     291                c = phase1_getc ();
     292                if (c != EOF)
     293                  {
     294                    if (c >= '0' && c <= '7')
     295                      {
     296                        n = (n << 3) + (c - '0');
     297                        c = phase1_getc ();
     298                        if (c != EOF)
     299                          {
     300                            if (c >= '0' && c <= '7')
     301                              n = (n << 3) + (c - '0');
     302                            else
     303                              phase1_ungetc (c);
     304                          }
     305                      }
     306                    else
     307                      phase1_ungetc (c);
     308                  }
     309                return (unsigned char) n;
     310              }
     311            case 'x':
     312              {
     313                int n = 0;
     314  
     315                for (;;)
     316                  {
     317                    c = phase1_getc ();
     318                    if (c == EOF)
     319                      break;
     320                    else if (c >= '0' && c <= '9')
     321                      n = (n << 4) + (c - '0');
     322                    else if (c >= 'A' && c <= 'F')
     323                      n = (n << 4) + (c - 'A' + 10);
     324                    else if (c >= 'a' && c <= 'f')
     325                      n = (n << 4) + (c - 'a' + 10);
     326                    else
     327                      {
     328                        phase1_ungetc (c);
     329                        break;
     330                      }
     331                  }
     332                return (unsigned char) n;
     333              }
     334            default:
     335              return c;
     336            }
     337      }
     338  
     339    phase1_ungetc (c);
     340    error_with_progname = false;
     341    error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
     342           line_number);
     343    error_with_progname = true;
     344    return P7_QUOTES;
     345  }
     346  
     347  
     348  /* Free the memory pointed to by a 'struct token_ty'.  */
     349  static inline void
     350  free_token (token_ty *tp)
     351  {
     352    switch (tp->type)
     353      {
     354      case token_type_string:
     355      case token_type_i18nstring:
     356      case token_type_symbol:
     357        free (tp->string);
     358        break;
     359      default:
     360        break;
     361      }
     362  }
     363  
     364  
     365  /* Combine characters into tokens.  Discard whitespace.  */
     366  
     367  /* There is an ambiguity about '/': It can start a division operator ('/' or
     368     '/=') or it can start a regular expression.  The distinction is important
     369     because inside regular expressions, '#' and '"' lose its special meanings.
     370     If you look at the awk grammar, you see that the operator is only allowed
     371     right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
     372     can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
     373     So we prefer the division operator interpretation only right after
     374     symbol, string, number, ')', ']', with whitespace but no newline allowed
     375     in between.  */
     376  static bool prefer_division_over_regexp;
     377  
     378  static void
     379  x_awk_lex (token_ty *tp)
     380  {
     381    static char *buffer;
     382    static int bufmax;
     383    int bufpos;
     384    int c;
     385  
     386    for (;;)
     387      {
     388        tp->line_number = line_number;
     389        c = phase2_getc ();
     390  
     391        switch (c)
     392          {
     393          case EOF:
     394            tp->type = token_type_eof;
     395            return;
     396  
     397          case '\n':
     398            if (last_non_comment_line > last_comment_line)
     399              savable_comment_reset ();
     400            /* Newline is not allowed inside expressions.  It usually
     401               introduces a fresh statement.
     402               FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
     403               does *not* introduce a fresh statement.  */
     404            prefer_division_over_regexp = false;
     405            FALLTHROUGH;
     406          case '\t':
     407          case ' ':
     408            /* Ignore whitespace and comments.  */
     409            continue;
     410  
     411          case '\\':
     412            /* Backslash ought to be immediately followed by a newline.  */
     413            continue;
     414          }
     415  
     416        last_non_comment_line = tp->line_number;
     417  
     418        switch (c)
     419          {
     420          case '.':
     421            {
     422              int c2 = phase2_getc ();
     423              phase2_ungetc (c2);
     424              if (!(c2 >= '0' && c2 <= '9'))
     425                {
     426  
     427                  tp->type = token_type_other;
     428                  prefer_division_over_regexp = false;
     429                  return;
     430                }
     431            }
     432            FALLTHROUGH;
     433          case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     434          case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
     435          case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
     436          case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
     437          case 'Y': case 'Z':
     438          case '_':
     439          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     440          case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
     441          case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
     442          case 's': case 't': case 'u': case 'v': case 'w': case 'x':
     443          case 'y': case 'z':
     444          case '0': case '1': case '2': case '3': case '4':
     445          case '5': case '6': case '7': case '8': case '9':
     446            /* Symbol, or part of a number.  */
     447            bufpos = 0;
     448            for (;;)
     449              {
     450                if (bufpos >= bufmax)
     451                  {
     452                    bufmax = 2 * bufmax + 10;
     453                    buffer = xrealloc (buffer, bufmax);
     454                  }
     455                buffer[bufpos++] = c;
     456                c = phase2_getc ();
     457                switch (c)
     458                  {
     459                  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     460                  case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
     461                  case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
     462                  case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
     463                  case 'Y': case 'Z':
     464                  case '_':
     465                  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     466                  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
     467                  case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
     468                  case 's': case 't': case 'u': case 'v': case 'w': case 'x':
     469                  case 'y': case 'z':
     470                  case '0': case '1': case '2': case '3': case '4':
     471                  case '5': case '6': case '7': case '8': case '9':
     472                    continue;
     473                  default:
     474                    if (bufpos == 1 && buffer[0] == '_' && c == '"')
     475                      {
     476                        tp->type = token_type_i18nstring;
     477                        goto case_string;
     478                      }
     479                    phase2_ungetc (c);
     480                    break;
     481                  }
     482                break;
     483              }
     484            if (bufpos >= bufmax)
     485              {
     486                bufmax = 2 * bufmax + 10;
     487                buffer = xrealloc (buffer, bufmax);
     488              }
     489            buffer[bufpos] = '\0';
     490            tp->string = xstrdup (buffer);
     491            tp->type = token_type_symbol;
     492            /* Most identifiers can be variable names; after them we must
     493               interpret '/' as division operator.  But for awk's builtin
     494               keywords we have three cases:
     495               (a) Must interpret '/' as division operator. "length".
     496               (b) Must interpret '/' as start of a regular expression.
     497                   "do", "exit", "print", "printf", "return".
     498               (c) '/' after this keyword in invalid anyway. All others.
     499               I used the following script for the distinction.
     500                  for k in $awk_keywords; do
     501                    echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
     502                  done
     503             */
     504            if (strcmp (buffer, "do") == 0
     505                || strcmp (buffer, "exit") == 0
     506                || strcmp (buffer, "print") == 0
     507                || strcmp (buffer, "printf") == 0
     508                || strcmp (buffer, "return") == 0)
     509              prefer_division_over_regexp = false;
     510            else
     511              prefer_division_over_regexp = true;
     512            return;
     513  
     514          case '"':
     515            tp->type = token_type_string;
     516          case_string:
     517            bufpos = 0;
     518            for (;;)
     519              {
     520                c = phase7_getc ();
     521                if (c == EOF || c == P7_QUOTES)
     522                  break;
     523                if (bufpos >= bufmax)
     524                  {
     525                    bufmax = 2 * bufmax + 10;
     526                    buffer = xrealloc (buffer, bufmax);
     527                  }
     528                buffer[bufpos++] = c;
     529              }
     530            if (bufpos >= bufmax)
     531              {
     532                bufmax = 2 * bufmax + 10;
     533                buffer = xrealloc (buffer, bufmax);
     534              }
     535            buffer[bufpos] = '\0';
     536            tp->string = xstrdup (buffer);
     537            prefer_division_over_regexp = true;
     538            return;
     539  
     540          case '(':
     541            tp->type = token_type_lparen;
     542            prefer_division_over_regexp = false;
     543            return;
     544  
     545          case ')':
     546            tp->type = token_type_rparen;
     547            prefer_division_over_regexp = true;
     548            return;
     549  
     550          case ',':
     551            tp->type = token_type_comma;
     552            prefer_division_over_regexp = false;
     553            return;
     554  
     555          case ';':
     556            tp->type = token_type_semicolon;
     557            prefer_division_over_regexp = false;
     558            return;
     559  
     560          case ']':
     561            tp->type = token_type_other;
     562            prefer_division_over_regexp = true;
     563            return;
     564  
     565          case '/':
     566            if (!prefer_division_over_regexp)
     567              {
     568                /* Regular expression.
     569                   Counting brackets is non-trivial. [[] is balanced, and so is
     570                   [\]]. Also, /[/]/ is balanced and ends at the third slash.
     571                   Do not count [ or ] if either one is preceded by a \.
     572                   A '[' should be counted if
     573                    a) it is the first one so far (brackets == 0), or
     574                    b) it is the '[' in '[:'.
     575                   A ']' should be counted if not preceded by a \.
     576                   According to POSIX, []] is how you put a ] into a set.
     577                   Try to handle that too.
     578                 */
     579                int brackets = 0;
     580                bool pos0 = true;         /* true at start of regexp */
     581                bool pos1_open = false;   /* true after [ at start of regexp */
     582                bool pos2_open_not = false; /* true after [^ at start of regexp */
     583  
     584                for (;;)
     585                  {
     586                    c = phase1_getc ();
     587  
     588                    if (c == EOF || c == '\n')
     589                      {
     590                        phase1_ungetc (c);
     591                        error_with_progname = false;
     592                        error (0, 0, _("%s:%d: warning: unterminated regular expression"),
     593                               logical_file_name, line_number);
     594                        error_with_progname = true;
     595                        break;
     596                      }
     597                    else if (c == '[')
     598                      {
     599                        if (brackets == 0)
     600                          brackets++;
     601                        else
     602                          {
     603                            c = phase1_getc ();
     604                            if (c == ':')
     605                              brackets++;
     606                            phase1_ungetc (c);
     607                          }
     608                        if (pos0)
     609                          {
     610                            pos0 = false;
     611                            pos1_open = true;
     612                            continue;
     613                          }
     614                      }
     615                    else if (c == ']')
     616                      {
     617                        if (!(pos1_open || pos2_open_not))
     618                          brackets--;
     619                      }
     620                    else if (c == '^')
     621                      {
     622                        if (pos1_open)
     623                          {
     624                            pos1_open = false;
     625                            pos2_open_not = true;
     626                            continue;
     627                          }
     628                      }
     629                    else if (c == '\\')
     630                      {
     631                        c = phase1_getc ();
     632                        /* Backslash-newline is valid and ignored.  */
     633                      }
     634                    else if (c == '/')
     635                      {
     636                        if (brackets <= 0)
     637                          break;
     638                      }
     639  
     640                    pos0 = false;
     641                    pos1_open = false;
     642                    pos2_open_not = false;
     643                  }
     644  
     645                tp->type = token_type_other;
     646                prefer_division_over_regexp = false;
     647                return;
     648              }
     649            FALLTHROUGH;
     650  
     651          default:
     652            /* We could carefully recognize each of the 2 and 3 character
     653               operators, but it is not necessary, as we only need to recognize
     654               gettext invocations.  Don't bother.  */
     655            tp->type = token_type_other;
     656            prefer_division_over_regexp = false;
     657            return;
     658          }
     659      }
     660  }
     661  
     662  
     663  /* ========================= Extracting strings.  ========================== */
     664  
     665  
     666  /* Context lookup table.  */
     667  static flag_context_list_table_ty *flag_context_list_table;
     668  
     669  
     670  /* Maximum supported nesting depth.  */
     671  #define MAX_NESTING_DEPTH 1000
     672  
     673  /* Current nesting depth.  */
     674  static int nesting_depth;
     675  
     676  
     677  /* The file is broken into tokens.  Scan the token stream, looking for
     678     a keyword, followed by a left paren, followed by a string.  When we
     679     see this sequence, we have something to remember.  We assume we are
     680     looking at a valid C or C++ program, and leave the complaints about
     681     the grammar to the compiler.
     682  
     683       Normal handling: Look for
     684         keyword ( ... msgid ... )
     685       Plural handling: Look for
     686         keyword ( ... msgid ... msgid_plural ... )
     687  
     688     We use recursion because the arguments before msgid or between msgid
     689     and msgid_plural can contain subexpressions of the same form.  */
     690  
     691  
     692  /* Extract messages until the next balanced closing parenthesis.
     693     Extracted messages are added to MLP.
     694     Return true upon eof, false upon closing parenthesis.  */
     695  static bool
     696  extract_parenthesized (message_list_ty *mlp,
     697                         flag_context_ty outer_context,
     698                         flag_context_list_iterator_ty context_iter,
     699                         struct arglist_parser *argparser)
     700  {
     701    /* Current argument number.  */
     702    int arg = 1;
     703    /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
     704    int state;
     705    /* Parameters of the keyword just seen.  Defined only in state 1.  */
     706    const struct callshapes *next_shapes = NULL;
     707    /* Whether to implicitly assume the next tokens are arguments even without
     708       a '('.  */
     709    bool next_is_argument = false;
     710    /* Context iterator that will be used if the next token is a '('.  */
     711    flag_context_list_iterator_ty next_context_iter =
     712      passthrough_context_list_iterator;
     713    /* Current context.  */
     714    flag_context_ty inner_context =
     715      inherited_context (outer_context,
     716                         flag_context_list_iterator_advance (&context_iter));
     717  
     718    /* Start state is 0.  */
     719    state = 0;
     720  
     721    for (;;)
     722      {
     723        token_ty token;
     724  
     725        x_awk_lex (&token);
     726  
     727        if (next_is_argument && token.type != token_type_lparen)
     728          {
     729            /* An argument list starts, even though there is no '('.  */
     730            context_iter = next_context_iter;
     731            outer_context = inner_context;
     732            inner_context =
     733              inherited_context (outer_context,
     734                                 flag_context_list_iterator_advance (
     735                                   &context_iter));
     736          }
     737  
     738        switch (token.type)
     739          {
     740          case token_type_symbol:
     741            {
     742              void *keyword_value;
     743  
     744              if (hash_find_entry (&keywords, token.string, strlen (token.string),
     745                                   &keyword_value)
     746                  == 0)
     747                {
     748                  next_shapes = (const struct callshapes *) keyword_value;
     749                  state = 1;
     750                }
     751              else
     752                state = 0;
     753            }
     754            next_is_argument =
     755              (strcmp (token.string, "print") == 0
     756               || strcmp (token.string, "printf") == 0);
     757            next_context_iter =
     758              flag_context_list_iterator (
     759                flag_context_list_table_lookup (
     760                  flag_context_list_table,
     761                  token.string, strlen (token.string)));
     762            free (token.string);
     763            continue;
     764  
     765          case token_type_lparen:
     766            if (++nesting_depth > MAX_NESTING_DEPTH)
     767              {
     768                error_with_progname = false;
     769                error (EXIT_FAILURE, 0, _("%s:%d: error: too many open parentheses"),
     770                       logical_file_name, line_number);
     771              }
     772            if (extract_parenthesized (mlp, inner_context, next_context_iter,
     773                                       arglist_parser_alloc (mlp,
     774                                                             state ? next_shapes : NULL)))
     775              {
     776                arglist_parser_done (argparser, arg);
     777                return true;
     778              }
     779            nesting_depth--;
     780            next_is_argument = false;
     781            next_context_iter = null_context_list_iterator;
     782            state = 0;
     783            continue;
     784  
     785          case token_type_rparen:
     786            arglist_parser_done (argparser, arg);
     787            return false;
     788  
     789          case token_type_comma:
     790            arg++;
     791            inner_context =
     792              inherited_context (outer_context,
     793                                 flag_context_list_iterator_advance (
     794                                   &context_iter));
     795            next_is_argument = false;
     796            next_context_iter = passthrough_context_list_iterator;
     797            state = 0;
     798            continue;
     799  
     800          case token_type_string:
     801            {
     802              lex_pos_ty pos;
     803              pos.file_name = logical_file_name;
     804              pos.line_number = token.line_number;
     805  
     806              if (extract_all)
     807                remember_a_message (mlp, NULL, token.string, false, false,
     808                                    inner_context, &pos,
     809                                    NULL, savable_comment, false);
     810              else
     811                {
     812                  mixed_string_ty *ms =
     813                    mixed_string_alloc_simple (token.string, lc_string,
     814                                               pos.file_name, pos.line_number);
     815                  free (token.string);
     816                  arglist_parser_remember (argparser, arg, ms,
     817                                           inner_context,
     818                                           pos.file_name, pos.line_number,
     819                                           savable_comment, false);
     820                }
     821            }
     822            next_is_argument = false;
     823            next_context_iter = null_context_list_iterator;
     824            state = 0;
     825            continue;
     826  
     827          case token_type_i18nstring:
     828            {
     829              lex_pos_ty pos;
     830              pos.file_name = logical_file_name;
     831              pos.line_number = token.line_number;
     832  
     833              remember_a_message (mlp, NULL, token.string, false, false,
     834                                  inner_context, &pos,
     835                                  NULL, savable_comment, false);
     836            }
     837            next_is_argument = false;
     838            next_context_iter = null_context_list_iterator;
     839            state = 0;
     840            continue;
     841  
     842          case token_type_semicolon:
     843            /* An argument list ends, and a new statement begins.  */
     844            /* FIXME: Should handle newline that acts as statement separator
     845               in the same way.  */
     846            /* FIXME: Instead of resetting outer_context here, it may be better
     847               to recurse in the next_is_argument handling above, waiting for
     848               the next semicolon or other statement terminator.  */
     849            outer_context = null_context;
     850            context_iter = null_context_list_iterator;
     851            next_is_argument = false;
     852            next_context_iter = passthrough_context_list_iterator;
     853            inner_context =
     854              inherited_context (outer_context,
     855                                 flag_context_list_iterator_advance (
     856                                   &context_iter));
     857            state = 0;
     858            continue;
     859  
     860          case token_type_eof:
     861            arglist_parser_done (argparser, arg);
     862            return true;
     863  
     864          case token_type_other:
     865            next_is_argument = false;
     866            next_context_iter = null_context_list_iterator;
     867            state = 0;
     868            continue;
     869  
     870          default:
     871            abort ();
     872          }
     873      }
     874  }
     875  
     876  
     877  void
     878  extract_awk (FILE *f,
     879               const char *real_filename, const char *logical_filename,
     880               flag_context_list_table_ty *flag_table,
     881               msgdomain_list_ty *mdlp)
     882  {
     883    message_list_ty *mlp = mdlp->item[0]->messages;
     884  
     885    fp = f;
     886    real_file_name = real_filename;
     887    logical_file_name = xstrdup (logical_filename);
     888    line_number = 1;
     889  
     890    last_comment_line = -1;
     891    last_non_comment_line = -1;
     892  
     893    prefer_division_over_regexp = false;
     894  
     895    flag_context_list_table = flag_table;
     896    nesting_depth = 0;
     897  
     898    init_keywords ();
     899  
     900    /* Eat tokens until eof is seen.  When extract_parenthesized returns
     901       due to an unbalanced closing parenthesis, just restart it.  */
     902    while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
     903                                   arglist_parser_alloc (mlp, NULL)))
     904      ;
     905  
     906    fp = NULL;
     907    real_file_name = NULL;
     908    logical_file_name = NULL;
     909    line_number = 0;
     910  }