1  /* xgettext librep backend.
       2     Copyright (C) 2001-2003, 2005-2009, 2018-2023 Free Software Foundation, Inc.
       3  
       4     This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
       5  
       6     This program is free software: you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation; either version 3 of the License, or
       9     (at your option) any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifdef HAVE_CONFIG_H
      20  # include "config.h"
      21  #endif
      22  
      23  /* Specification.  */
      24  #include "x-librep.h"
      25  
      26  #include <errno.h>
      27  #include <stdbool.h>
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <string.h>
      31  
      32  #include "attribute.h"
      33  #include "c-ctype.h"
      34  #include "message.h"
      35  #include "xgettext.h"
      36  #include "xg-pos.h"
      37  #include "xg-mixed-string.h"
      38  #include "xg-arglist-context.h"
      39  #include "xg-arglist-callshape.h"
      40  #include "xg-arglist-parser.h"
      41  #include "xg-message.h"
      42  #include "error.h"
      43  #include "error-progname.h"
      44  #include "xalloc.h"
      45  #include "mem-hash-map.h"
      46  #include "gettext.h"
      47  
      48  #define _(s) gettext(s)
      49  
      50  
      51  /* Summary of librep syntax:
      52     - ';' starts a comment until end of line.
      53     - Block comments start with '#|' and end with '|#'.
      54     - Numbers are constituted of an optional prefix (#b, #B for binary,
      55       #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
      56       #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
      57       the digits.
      58     - Characters are written as '?' followed by the character, possibly
      59       with an escape sequence, for examples '?a', '?\n', '?\177'.
      60     - Strings are delimited by double quotes. Backslash introduces an escape
      61       sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
      62       '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
      63     - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
      64       if preceded by backslash or enclosed in |...|.
      65     - Keywords: written as #:SYMBOL.
      66     - () delimit lists.
      67     - [] delimit vectors.
      68     The reader is implemented in librep-0.14/src/lisp.c.  */
      69  
      70  
      71  /* ====================== Keyword set customization.  ====================== */
      72  
      73  /* If true extract all strings.  */
      74  static bool extract_all = false;
      75  
      76  static hash_table keywords;
      77  static bool default_keywords = true;
      78  
      79  
      80  void
      81  x_librep_extract_all ()
      82  {
      83    extract_all = true;
      84  }
      85  
      86  
      87  void
      88  x_librep_keyword (const char *name)
      89  {
      90    if (name == NULL)
      91      default_keywords = false;
      92    else
      93      {
      94        const char *end;
      95        struct callshape shape;
      96        const char *colon;
      97  
      98        if (keywords.table == NULL)
      99          hash_init (&keywords, 100);
     100  
     101        split_keywordspec (name, &end, &shape);
     102  
     103        /* The characters between name and end should form a valid Lisp
     104           symbol.  */
     105        colon = strchr (name, ':');
     106        if (colon == NULL || colon >= end)
     107          insert_keyword_callshape (&keywords, name, end - name, &shape);
     108      }
     109  }
     110  
     111  /* Finish initializing the keywords hash table.
     112     Called after argument processing, before each file is processed.  */
     113  static void
     114  init_keywords ()
     115  {
     116    if (default_keywords)
     117      {
     118        /* When adding new keywords here, also update the documentation in
     119           xgettext.texi!  */
     120        x_librep_keyword ("_");
     121        default_keywords = false;
     122      }
     123  }
     124  
     125  void
     126  init_flag_table_librep ()
     127  {
     128    xgettext_record_flag ("_:1:pass-librep-format");
     129    xgettext_record_flag ("format:2:librep-format");
     130  }
     131  
     132  
     133  /* ======================== Reading of characters.  ======================== */
     134  
     135  /* The input file stream.  */
     136  static FILE *fp;
     137  
     138  
     139  /* Fetch the next character from the input file.  */
     140  static int
     141  do_getc ()
     142  {
     143    int c = getc (fp);
     144  
     145    if (c == EOF)
     146      {
     147        if (ferror (fp))
     148          error (EXIT_FAILURE, errno,
     149                 _("error while reading \"%s\""), real_file_name);
     150      }
     151    else if (c == '\n')
     152     line_number++;
     153  
     154    return c;
     155  }
     156  
     157  /* Put back the last fetched character, not EOF.  */
     158  static void
     159  do_ungetc (int c)
     160  {
     161    if (c == '\n')
     162      line_number--;
     163    ungetc (c, fp);
     164  }
     165  
     166  
     167  /* ========================== Reading of tokens.  ========================== */
     168  
     169  
     170  /* A token consists of a sequence of characters.  */
     171  struct token
     172  {
     173    int allocated;                /* number of allocated 'token_char's */
     174    int charcount;                /* number of used 'token_char's */
     175    char *chars;                  /* the token's constituents */
     176  };
     177  
     178  /* Initialize a 'struct token'.  */
     179  static inline void
     180  init_token (struct token *tp)
     181  {
     182    tp->allocated = 10;
     183    tp->chars = XNMALLOC (tp->allocated, char);
     184    tp->charcount = 0;
     185  }
     186  
     187  /* Free the memory pointed to by a 'struct token'.  */
     188  static inline void
     189  free_token (struct token *tp)
     190  {
     191    free (tp->chars);
     192  }
     193  
     194  /* Ensure there is enough room in the token for one more character.  */
     195  static inline void
     196  grow_token (struct token *tp)
     197  {
     198    if (tp->charcount == tp->allocated)
     199      {
     200        tp->allocated *= 2;
     201        tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
     202      }
     203  }
     204  
     205  /* Read the next token.  If 'first' is given, it points to the first
     206     character, which has already been read.  Returns true for a symbol,
     207     false for a number.  */
     208  static bool
     209  read_token (struct token *tp, const int *first)
     210  {
     211    int c;
     212    /* Variables for speculative number parsing:  */
     213    int radix = -1;
     214    int nfirst = 0;
     215    bool exact = true;
     216    bool rational = false;
     217    bool exponent = false;
     218    bool had_sign = false;
     219    bool expecting_prefix = false;
     220  
     221    init_token (tp);
     222  
     223    if (first)
     224      c = *first;
     225    else
     226      c = do_getc ();
     227  
     228    for (;; c = do_getc ())
     229      {
     230        switch (c)
     231          {
     232          case EOF:
     233            goto done;
     234  
     235          case ' ': case '\t': case '\n': case '\f': case '\r':
     236          case '(': case ')': case '[': case ']':
     237          case '\'': case '"': case ';': case ',': case '`':
     238            goto done;
     239  
     240          case '\\':
     241            radix = 0;
     242            c = do_getc ();
     243            if (c == EOF)
     244              /* Invalid, but be tolerant.  */
     245              break;
     246            grow_token (tp);
     247            tp->chars[tp->charcount++] = c;
     248            break;
     249  
     250          case '|':
     251            radix = 0;
     252            for (;;)
     253              {
     254                c = do_getc ();
     255                if (c == EOF || c == '|')
     256                  break;
     257                grow_token (tp);
     258                tp->chars[tp->charcount++] = c;
     259              }
     260            break;
     261  
     262          default:
     263            if (radix != 0)
     264              {
     265                if (expecting_prefix)
     266                  {
     267                    switch (c)
     268                      {
     269                      case 'B': case 'b':
     270                        radix = 2;
     271                        break;
     272                      case 'O': case 'o':
     273                        radix = 8;
     274                        break;
     275                      case 'D': case 'd':
     276                        radix = 10;
     277                        break;
     278                      case 'X': case 'x':
     279                        radix = 16;
     280                        break;
     281                      case 'E': case 'e':
     282                      case 'I': case 'i':
     283                        break;
     284                      default:
     285                        radix = 0;
     286                        break;
     287                      }
     288                    expecting_prefix = false;
     289                    nfirst = tp->charcount + 1;
     290                  }
     291                else if (tp->charcount == nfirst
     292                         && (c == '+' || c == '-' || c == '#'))
     293                  {
     294                    if (c == '#')
     295                      {
     296                        if (had_sign)
     297                          radix = 0;
     298                        else
     299                          expecting_prefix = true;
     300                      }
     301                    else
     302                      had_sign = true;
     303                    nfirst = tp->charcount + 1;
     304                  }
     305                else
     306                  {
     307                    switch (radix)
     308                      {
     309                      case -1:
     310                        if (c == '.')
     311                          {
     312                            radix = 10;
     313                            exact = false;
     314                          }
     315                        else if (!(c >= '0' && c <= '9'))
     316                          radix = 0;
     317                        else if (c == '0')
     318                          radix = 1;
     319                        else
     320                          radix = 10;
     321                        break;
     322  
     323                      case 1:
     324                        switch (c)
     325                          {
     326                          case 'X': case 'x':
     327                            radix = 16;
     328                            nfirst = tp->charcount + 1;
     329                            break;
     330                          case '0': case '1': case '2': case '3': case '4':
     331                          case '5': case '6': case '7':
     332                            radix = 8;
     333                            nfirst = tp->charcount;
     334                            break;
     335                          case '.': case 'E': case 'e':
     336                            radix = 10;
     337                            exact = false;
     338                            break;
     339                          case '/':
     340                            radix = 10;
     341                            rational = true;
     342                            break;
     343                          default:
     344                            radix = 0;
     345                            break;
     346                          }
     347                        break;
     348  
     349                      default:
     350                        switch (c)
     351                          {
     352                          case '.':
     353                            if (exact && radix == 10 && !rational)
     354                              exact = false;
     355                            else
     356                              radix = 0;
     357                            break;
     358                          case '/':
     359                            if (exact && !rational)
     360                              rational = true;
     361                            else
     362                              radix = 0;
     363                            break;
     364                          case 'E': case 'e':
     365                            if (radix == 10)
     366                              {
     367                                if (!rational && !exponent)
     368                                  {
     369                                    exponent = true;
     370                                    exact = false;
     371                                  }
     372                                else
     373                                  radix = 0;
     374                                break;
     375                              }
     376                            FALLTHROUGH;
     377                          default:
     378                            if (exponent && (c == '+' || c == '-'))
     379                              break;
     380                            if ((radix <= 10
     381                                 && !(c >= '0' && c <= '0' + radix - 1))
     382                                || (radix == 16 && !c_isxdigit (c)))
     383                              radix = 0;
     384                            break;
     385                          }
     386                        break;
     387                      }
     388                  }
     389              }
     390            else
     391              {
     392                if (c == '#')
     393                  goto done;
     394              }
     395            grow_token (tp);
     396            tp->chars[tp->charcount++] = c;
     397          }
     398      }
     399   done:
     400    if (c != EOF)
     401      do_ungetc (c);
     402    if (radix > 0 && nfirst < tp->charcount)
     403      return false; /* number */
     404    else
     405      return true; /* symbol */
     406  }
     407  
     408  
     409  /* ========================= Accumulating comments ========================= */
     410  
     411  
     412  static char *buffer;
     413  static size_t bufmax;
     414  static size_t buflen;
     415  
     416  static inline void
     417  comment_start ()
     418  {
     419    buflen = 0;
     420  }
     421  
     422  static inline void
     423  comment_add (int c)
     424  {
     425    if (buflen >= bufmax)
     426      {
     427        bufmax = 2 * bufmax + 10;
     428        buffer = xrealloc (buffer, bufmax);
     429      }
     430    buffer[buflen++] = c;
     431  }
     432  
     433  static inline void
     434  comment_line_end (size_t chars_to_remove)
     435  {
     436    buflen -= chars_to_remove;
     437    while (buflen >= 1
     438           && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
     439      --buflen;
     440    if (chars_to_remove == 0 && buflen >= bufmax)
     441      {
     442        bufmax = 2 * bufmax + 10;
     443        buffer = xrealloc (buffer, bufmax);
     444      }
     445    buffer[buflen] = '\0';
     446    savable_comment_add (buffer);
     447  }
     448  
     449  
     450  /* These are for tracking whether comments count as immediately before
     451     keyword.  */
     452  static int last_comment_line;
     453  static int last_non_comment_line;
     454  
     455  
     456  /* ========================= Accumulating messages ========================= */
     457  
     458  
     459  static message_list_ty *mlp;
     460  
     461  
     462  /* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
     463  
     464  
     465  /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
     466     Other objects need not to be represented precisely.  */
     467  enum object_type
     468  {
     469    t_symbol,     /* symbol */
     470    t_string,     /* string */
     471    t_other,      /* other kind of real object */
     472    t_dot,        /* '.' pseudo object */
     473    t_close,      /* ')' or ']' pseudo object */
     474    t_eof         /* EOF marker */
     475  };
     476  
     477  struct object
     478  {
     479    enum object_type type;
     480    struct token *token;          /* for t_symbol and t_string */
     481    int line_number_at_start;     /* for t_string */
     482  };
     483  
     484  /* Free the memory pointed to by a 'struct object'.  */
     485  static inline void
     486  free_object (struct object *op)
     487  {
     488    if (op->type == t_symbol || op->type == t_string)
     489      {
     490        free_token (op->token);
     491        free (op->token);
     492      }
     493  }
     494  
     495  /* Convert a t_symbol/t_string token to a char*.  */
     496  static char *
     497  string_of_object (const struct object *op)
     498  {
     499    char *str;
     500    int n;
     501  
     502    if (!(op->type == t_symbol || op->type == t_string))
     503      abort ();
     504    n = op->token->charcount;
     505    str = XNMALLOC (n + 1, char);
     506    memcpy (str, op->token->chars, n);
     507    str[n] = '\0';
     508    return str;
     509  }
     510  
     511  
     512  /* Context lookup table.  */
     513  static flag_context_list_table_ty *flag_context_list_table;
     514  
     515  
     516  /* Maximum supported nesting depth.  */
     517  #define MAX_NESTING_DEPTH 1000
     518  
     519  /* Current nesting depth.  */
     520  static int nesting_depth;
     521  
     522  
     523  /* Returns the character represented by an escape sequence.  */
     524  static int
     525  do_getc_escaped (int c)
     526  {
     527    switch (c)
     528      {
     529      case 'n':
     530        return '\n';
     531      case 'r':
     532        return '\r';
     533      case 'f':
     534        return '\f';
     535      case 't':
     536        return '\t';
     537      case 'v':
     538        return '\v';
     539      case 'a':
     540        return '\a';
     541      case '^':
     542        c = do_getc ();
     543        if (c == EOF)
     544          return EOF;
     545        return c & 0x1f;
     546      case '0': case '1': case '2': case '3': case '4':
     547      case '5': case '6': case '7':
     548        {
     549          int n = c - '0';
     550  
     551          c = do_getc ();
     552          if (c != EOF)
     553            {
     554              if (c >= '0' && c <= '7')
     555                {
     556                  n = (n << 3) + (c - '0');
     557                  c = do_getc ();
     558                  if (c != EOF)
     559                    {
     560                      if (c >= '0' && c <= '7')
     561                        n = (n << 3) + (c - '0');
     562                      else
     563                        do_ungetc (c);
     564                    }
     565                }
     566              else
     567                do_ungetc (c);
     568            }
     569          return (unsigned char) n;
     570        }
     571      case 'x':
     572        {
     573          int n = 0;
     574  
     575          for (;;)
     576            {
     577              c = do_getc ();
     578              if (c == EOF)
     579                break;
     580              else if (c >= '0' && c <= '9')
     581                n = (n << 4) + (c - '0');
     582              else if (c >= 'A' && c <= 'F')
     583                n = (n << 4) + (c - 'A' + 10);
     584              else if (c >= 'a' && c <= 'f')
     585                n = (n << 4) + (c - 'a' + 10);
     586              else
     587                {
     588                  do_ungetc (c);
     589                  break;
     590                }
     591            }
     592          return (unsigned char) n;
     593        }
     594      default:
     595        return c;
     596      }
     597  }
     598  
     599  /* Read the next object.  */
     600  static void
     601  read_object (struct object *op, flag_context_ty outer_context)
     602  {
     603    if (nesting_depth > MAX_NESTING_DEPTH)
     604      {
     605        error_with_progname = false;
     606        error (EXIT_FAILURE, 0, _("%s:%d: error: too deeply nested objects"),
     607               logical_file_name, line_number);
     608      }
     609    for (;;)
     610      {
     611        int ch;
     612  
     613        ch = do_getc ();
     614  
     615        switch (ch)
     616          {
     617          case EOF:
     618            op->type = t_eof;
     619            return;
     620  
     621          case '\n':
     622            /* Comments assumed to be grouped with a message must immediately
     623               precede it, with no non-whitespace token on a line between
     624               both.  */
     625            if (last_non_comment_line > last_comment_line)
     626              savable_comment_reset ();
     627            continue;
     628  
     629          case ' ': case '\t': case '\f': case '\r':
     630            continue;
     631  
     632          case '(':
     633            {
     634              int arg = 0;                /* Current argument number.  */
     635              flag_context_list_iterator_ty context_iter;
     636              const struct callshapes *shapes = NULL;
     637              struct arglist_parser *argparser = NULL;
     638  
     639              for (;; arg++)
     640                {
     641                  struct object inner;
     642                  flag_context_ty inner_context;
     643  
     644                  if (arg == 0)
     645                    inner_context = null_context;
     646                  else
     647                    inner_context =
     648                      inherited_context (outer_context,
     649                                         flag_context_list_iterator_advance (
     650                                           &context_iter));
     651  
     652                  ++nesting_depth;
     653                  read_object (&inner, inner_context);
     654                  nesting_depth--;
     655  
     656                  /* Recognize end of list.  */
     657                  if (inner.type == t_close)
     658                    {
     659                      op->type = t_other;
     660                      /* Don't bother converting "()" to "NIL".  */
     661                      last_non_comment_line = line_number;
     662                      if (argparser != NULL)
     663                        arglist_parser_done (argparser, arg);
     664                      return;
     665                    }
     666  
     667                  /* Dots are not allowed in every position.
     668                     But be tolerant.  */
     669  
     670                  /* EOF inside list is illegal.  But be tolerant.  */
     671                  if (inner.type == t_eof)
     672                    break;
     673  
     674                  if (arg == 0)
     675                    {
     676                      /* This is the function position.  */
     677                      if (inner.type == t_symbol)
     678                        {
     679                          char *symbol_name = string_of_object (&inner);
     680                          void *keyword_value;
     681  
     682                          if (hash_find_entry (&keywords,
     683                                               symbol_name, strlen (symbol_name),
     684                                               &keyword_value)
     685                              == 0)
     686                            shapes = (const struct callshapes *) keyword_value;
     687  
     688                          argparser = arglist_parser_alloc (mlp, shapes);
     689  
     690                          context_iter =
     691                            flag_context_list_iterator (
     692                              flag_context_list_table_lookup (
     693                                flag_context_list_table,
     694                                symbol_name, strlen (symbol_name)));
     695  
     696                          free (symbol_name);
     697                        }
     698                      else
     699                        context_iter = null_context_list_iterator;
     700                    }
     701                  else
     702                    {
     703                      /* These are the argument positions.  */
     704                      if (argparser != NULL && inner.type == t_string)
     705                        {
     706                          char *s = string_of_object (&inner);
     707                          mixed_string_ty *ms =
     708                            mixed_string_alloc_simple (s, lc_string,
     709                                                       logical_file_name,
     710                                                       inner.line_number_at_start);
     711                          free (s);
     712                          arglist_parser_remember (argparser, arg, ms,
     713                                                   inner_context,
     714                                                   logical_file_name,
     715                                                   inner.line_number_at_start,
     716                                                   savable_comment, false);
     717                        }
     718                    }
     719  
     720                  free_object (&inner);
     721                }
     722  
     723              if (argparser != NULL)
     724                arglist_parser_done (argparser, arg);
     725            }
     726            op->type = t_other;
     727            last_non_comment_line = line_number;
     728            return;
     729  
     730          case '[':
     731            {
     732              for (;;)
     733                {
     734                  struct object inner;
     735  
     736                  ++nesting_depth;
     737                  read_object (&inner, null_context);
     738                  nesting_depth--;
     739  
     740                  /* Recognize end of vector.  */
     741                  if (inner.type == t_close)
     742                    {
     743                      op->type = t_other;
     744                      last_non_comment_line = line_number;
     745                      return;
     746                    }
     747  
     748                  /* Dots are not allowed.  But be tolerant.  */
     749  
     750                  /* EOF inside vector is illegal.  But be tolerant.  */
     751                  if (inner.type == t_eof)
     752                    break;
     753  
     754                  free_object (&inner);
     755                }
     756            }
     757            op->type = t_other;
     758            last_non_comment_line = line_number;
     759            return;
     760  
     761          case ')': case ']':
     762            /* Tell the caller about the end of list or vector.
     763               Unmatched closing parenthesis is illegal.  But be tolerant.  */
     764            op->type = t_close;
     765            last_non_comment_line = line_number;
     766            return;
     767  
     768          case ',':
     769            {
     770              int c = do_getc ();
     771              /* The ,@ handling inside lists is wrong anyway, because
     772                 ,@form expands to an unknown number of elements.  */
     773              if (c != EOF && c != '@')
     774                do_ungetc (c);
     775            }
     776            FALLTHROUGH;
     777          case '\'':
     778          case '`':
     779            {
     780              struct object inner;
     781  
     782              ++nesting_depth;
     783              read_object (&inner, null_context);
     784              nesting_depth--;
     785  
     786              /* Dots and EOF are not allowed here.  But be tolerant.  */
     787  
     788              free_object (&inner);
     789  
     790              op->type = t_other;
     791              last_non_comment_line = line_number;
     792              return;
     793            }
     794  
     795          case ';':
     796            {
     797              bool all_semicolons = true;
     798  
     799              last_comment_line = line_number;
     800              comment_start ();
     801              for (;;)
     802                {
     803                  int c = do_getc ();
     804                  if (c == EOF || c == '\n' || c == '\f' || c == '\r')
     805                    break;
     806                  if (c != ';')
     807                    all_semicolons = false;
     808                  if (!all_semicolons)
     809                    {
     810                      /* We skip all leading white space, but not EOLs.  */
     811                      if (!(buflen == 0 && (c == ' ' || c == '\t')))
     812                        comment_add (c);
     813                    }
     814                }
     815              comment_line_end (0);
     816              continue;
     817            }
     818  
     819          case '"':
     820            {
     821              op->token = XMALLOC (struct token);
     822              init_token (op->token);
     823              op->line_number_at_start = line_number;
     824              for (;;)
     825                {
     826                  int c = do_getc ();
     827                  if (c == EOF)
     828                    /* Invalid input.  Be tolerant, no error message.  */
     829                    break;
     830                  if (c == '"')
     831                    break;
     832                  if (c == '\\')
     833                    {
     834                      c = do_getc ();
     835                      if (c == EOF)
     836                        /* Invalid input.  Be tolerant, no error message.  */
     837                        break;
     838                      if (c == '\n')
     839                        /* Ignore escaped newline.  */
     840                        ;
     841                      else
     842                        {
     843                          c = do_getc_escaped (c);
     844                          if (c == EOF)
     845                            /* Invalid input.  Be tolerant, no error message.  */
     846                            break;
     847                          grow_token (op->token);
     848                          op->token->chars[op->token->charcount++] = c;
     849                        }
     850                    }
     851                  else
     852                    {
     853                      grow_token (op->token);
     854                      op->token->chars[op->token->charcount++] = c;
     855                    }
     856                }
     857              op->type = t_string;
     858  
     859              if (extract_all)
     860                {
     861                  lex_pos_ty pos;
     862  
     863                  pos.file_name = logical_file_name;
     864                  pos.line_number = op->line_number_at_start;
     865                  remember_a_message (mlp, NULL, string_of_object (op), false,
     866                                      false, null_context, &pos,
     867                                      NULL, savable_comment, false);
     868                }
     869              last_non_comment_line = line_number;
     870              return;
     871            }
     872  
     873          case '?':
     874            {
     875              int c = do_getc ();
     876              if (c == EOF)
     877                /* Invalid input.  Be tolerant, no error message.  */
     878                ;
     879              else if (c == '\\')
     880                {
     881                  c = do_getc ();
     882                  if (c == EOF)
     883                    /* Invalid input.  Be tolerant, no error message.  */
     884                    ;
     885                  else
     886                    {
     887                      c = do_getc_escaped (c);
     888                      if (c == EOF)
     889                        /* Invalid input.  Be tolerant, no error message.  */
     890                        ;
     891                    }
     892                }
     893              op->type = t_other;
     894              last_non_comment_line = line_number;
     895              return;
     896            }
     897  
     898          case '#':
     899            /* Dispatch macro handling.  */
     900            {
     901              int dmc = do_getc ();
     902              if (dmc == EOF)
     903                /* Invalid input.  Be tolerant, no error message.  */
     904                {
     905                  op->type = t_other;
     906                  return;
     907                }
     908  
     909              switch (dmc)
     910                {
     911                case '!':
     912                  if (ftell (fp) == 2)
     913                    /* Skip comment until !# */
     914                    {
     915                      int c;
     916  
     917                      c = do_getc ();
     918                      for (;;)
     919                        {
     920                          if (c == EOF)
     921                            break;
     922                          if (c == '!')
     923                            {
     924                              c = do_getc ();
     925                              if (c == EOF || c == '#')
     926                                break;
     927                            }
     928                          else
     929                            c = do_getc ();
     930                        }
     931                      if (c == EOF)
     932                        {
     933                          /* EOF not allowed here.  But be tolerant.  */
     934                          op->type = t_eof;
     935                          return;
     936                        }
     937                      continue;
     938                    }
     939                  FALLTHROUGH;
     940                case '\'':
     941                case ':':
     942                  {
     943                    struct object inner;
     944                    ++nesting_depth;
     945                    read_object (&inner, null_context);
     946                    nesting_depth--;
     947                    /* Dots and EOF are not allowed here.
     948                       But be tolerant.  */
     949                    free_object (&inner);
     950                    op->type = t_other;
     951                    last_non_comment_line = line_number;
     952                    return;
     953                  }
     954  
     955                case '[':
     956                case '(':
     957                  {
     958                    struct object inner;
     959                    do_ungetc (dmc);
     960                    ++nesting_depth;
     961                    read_object (&inner, null_context);
     962                    nesting_depth--;
     963                    /* Dots and EOF are not allowed here.
     964                       But be tolerant.  */
     965                    free_object (&inner);
     966                    op->type = t_other;
     967                    last_non_comment_line = line_number;
     968                    return;
     969                  }
     970  
     971                case '|':
     972                  {
     973                    int depth = 0;
     974                    int c;
     975  
     976                    comment_start ();
     977                    c = do_getc ();
     978                    for (;;)
     979                      {
     980                        if (c == EOF)
     981                          break;
     982                        if (c == '|')
     983                          {
     984                            c = do_getc ();
     985                            if (c == EOF)
     986                              break;
     987                            if (c == '#')
     988                              {
     989                                if (depth == 0)
     990                                  {
     991                                    comment_line_end (0);
     992                                    break;
     993                                  }
     994                                depth--;
     995                                comment_add ('|');
     996                                comment_add ('#');
     997                                c = do_getc ();
     998                              }
     999                            else
    1000                              comment_add ('|');
    1001                          }
    1002                        else if (c == '#')
    1003                          {
    1004                            c = do_getc ();
    1005                            if (c == EOF)
    1006                              break;
    1007                            comment_add ('#');
    1008                            if (c == '|')
    1009                              {
    1010                                depth++;
    1011                                comment_add ('|');
    1012                                c = do_getc ();
    1013                              }
    1014                          }
    1015                        else
    1016                          {
    1017                            /* We skip all leading white space.  */
    1018                            if (!(buflen == 0 && (c == ' ' || c == '\t')))
    1019                              comment_add (c);
    1020                            if (c == '\n')
    1021                              {
    1022                                comment_line_end (1);
    1023                                comment_start ();
    1024                              }
    1025                            c = do_getc ();
    1026                          }
    1027                      }
    1028                    if (c == EOF)
    1029                      {
    1030                        /* EOF not allowed here.  But be tolerant.  */
    1031                        op->type = t_eof;
    1032                        return;
    1033                      }
    1034                    last_comment_line = line_number;
    1035                    continue;
    1036                  }
    1037  
    1038                case '\\':
    1039                  {
    1040                    struct token token;
    1041                    int first = '\\';
    1042                    read_token (&token, &first);
    1043                    free_token (&token);
    1044                    op->type = t_other;
    1045                    last_non_comment_line = line_number;
    1046                    return;
    1047                  }
    1048  
    1049                case 'T': case 't':
    1050                case 'F': case 'f':
    1051                  op->type = t_other;
    1052                  last_non_comment_line = line_number;
    1053                  return;
    1054  
    1055                case 'B': case 'b':
    1056                case 'O': case 'o':
    1057                case 'D': case 'd':
    1058                case 'X': case 'x':
    1059                case 'E': case 'e':
    1060                case 'I': case 'i':
    1061                  {
    1062                    struct token token;
    1063                    do_ungetc (dmc);
    1064                    {
    1065                      int c;
    1066                      c = '#';
    1067                      read_token (&token, &c);
    1068                      free_token (&token);
    1069                    }
    1070                    op->type = t_other;
    1071                    last_non_comment_line = line_number;
    1072                    return;
    1073                  }
    1074  
    1075                default:
    1076                  /* Invalid input.  Be tolerant, no error message.  */
    1077                  op->type = t_other;
    1078                  last_non_comment_line = line_number;
    1079                  return;
    1080                }
    1081  
    1082              /*NOTREACHED*/
    1083              abort ();
    1084            }
    1085  
    1086          default:
    1087            /* Read a token.  */
    1088            {
    1089              bool symbol;
    1090  
    1091              op->token = XMALLOC (struct token);
    1092              symbol = read_token (op->token, &ch);
    1093              if (op->token->charcount == 1 && op->token->chars[0] == '.')
    1094                {
    1095                  free_token (op->token);
    1096                  free (op->token);
    1097                  op->type = t_dot;
    1098                  last_non_comment_line = line_number;
    1099                  return;
    1100                }
    1101              if (!symbol)
    1102                {
    1103                  free_token (op->token);
    1104                  free (op->token);
    1105                  op->type = t_other;
    1106                  last_non_comment_line = line_number;
    1107                  return;
    1108                }
    1109              /* Distinguish between "foo" and "foo#bar".  */
    1110              {
    1111                int c = do_getc ();
    1112                if (c == '#')
    1113                  {
    1114                    struct token second_token;
    1115  
    1116                    free_token (op->token);
    1117                    free (op->token);
    1118                    read_token (&second_token, NULL);
    1119                    free_token (&second_token);
    1120                    op->type = t_other;
    1121                    last_non_comment_line = line_number;
    1122                    return;
    1123                  }
    1124                else
    1125                  {
    1126                    if (c != EOF)
    1127                      do_ungetc (c);
    1128                    op->type = t_symbol;
    1129                    last_non_comment_line = line_number;
    1130                    return;
    1131                  }
    1132              }
    1133            }
    1134          }
    1135      }
    1136  }
    1137  
    1138  
    1139  void
    1140  extract_librep (FILE *f,
    1141                  const char *real_filename, const char *logical_filename,
    1142                  flag_context_list_table_ty *flag_table,
    1143                  msgdomain_list_ty *mdlp)
    1144  {
    1145    mlp = mdlp->item[0]->messages;
    1146  
    1147    fp = f;
    1148    real_file_name = real_filename;
    1149    logical_file_name = xstrdup (logical_filename);
    1150    line_number = 1;
    1151  
    1152    last_comment_line = -1;
    1153    last_non_comment_line = -1;
    1154  
    1155    flag_context_list_table = flag_table;
    1156    nesting_depth = 0;
    1157  
    1158    init_keywords ();
    1159  
    1160    /* Eat tokens until eof is seen.  When read_object returns
    1161       due to an unbalanced closing parenthesis, just restart it.  */
    1162    do
    1163      {
    1164        struct object toplevel_object;
    1165  
    1166        read_object (&toplevel_object, null_context);
    1167  
    1168        if (toplevel_object.type == t_eof)
    1169          break;
    1170  
    1171        free_object (&toplevel_object);
    1172      }
    1173    while (!feof (fp));
    1174  
    1175    /* Close scanner.  */
    1176    fp = NULL;
    1177    real_file_name = NULL;
    1178    logical_file_name = NULL;
    1179    line_number = 0;
    1180  }