1  /* Reading NeXTstep/GNUstep .strings files.
       2     Copyright (C) 2003, 2005-2007, 2009, 2019-2020, 2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2003.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include <config.h>
      20  #endif
      21  
      22  /* Specification.  */
      23  #include "read-stringtable.h"
      24  
      25  #include <assert.h>
      26  #include <errno.h>
      27  #include <stdbool.h>
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <string.h>
      31  
      32  #include "attribute.h"
      33  #include "error.h"
      34  #include "error-progname.h"
      35  #include "read-catalog-abstract.h"
      36  #include "xalloc.h"
      37  #include "xvasprintf.h"
      38  #include "po-xerror.h"
      39  #include "unistr.h"
      40  #include "gettext.h"
      41  
      42  #define _(str) gettext (str)
      43  
      44  /* The format of NeXTstep/GNUstep .strings files is documented in
      45       gnustep-base-1.8.0/Tools/make_strings/Using.txt
      46     and in the comments of method propertyListFromStringsFileFormat in
      47       gnustep-base-1.8.0/Source/NSString.m
      48     In summary, it's a Objective-C like file with pseudo-assignments of the form
      49            "key" = "value";
      50     where the key is the msgid and the value is the msgstr.
      51  
      52     The implementation of the parser of .strings files is in
      53       gnustep-base-1.8.0/Source/NSString.m
      54       function GSPropertyListFromStringsFormat
      55       (indirectly called from NSBundle's method localizedStringForKey).
      56  
      57     A test case is in
      58       gnustep-base-1.8.0/Testing/English.lproj/NXStringTable.example
      59   */
      60  
      61  /* Handling of comments: We copy all comments from the .strings file to
      62     the PO file. This is not really needed; it's a service for translators
      63     who don't like PO files and prefer to maintain the .strings file.  */
      64  
      65  
      66  /* Real filename, used in error messages about the input file.  */
      67  static const char *real_file_name;
      68  
      69  /* File name and line number.  */
      70  extern lex_pos_ty gram_pos;
      71  
      72  /* The input file stream.  */
      73  static FILE *fp;
      74  
      75  
      76  /* Phase 1: Read a byte.
      77     Max. 4 pushback characters.  */
      78  
      79  static unsigned char phase1_pushback[4];
      80  static int phase1_pushback_length;
      81  
      82  static int
      83  phase1_getc ()
      84  {
      85    int c;
      86  
      87    if (phase1_pushback_length)
      88      return phase1_pushback[--phase1_pushback_length];
      89  
      90    c = getc (fp);
      91  
      92    if (c == EOF)
      93      {
      94        if (ferror (fp))
      95          {
      96            const char *errno_description = strerror (errno);
      97            po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
      98                       xasprintf ("%s: %s",
      99                                  xasprintf (_("error while reading \"%s\""),
     100                                             real_file_name),
     101                                  errno_description));
     102          }
     103        return EOF;
     104      }
     105  
     106    return c;
     107  }
     108  
     109  static void
     110  phase1_ungetc (int c)
     111  {
     112    if (c != EOF)
     113      phase1_pushback[phase1_pushback_length++] = c;
     114  }
     115  
     116  
     117  /* Phase 2: Read an UCS-4 character.
     118     Max. 2 pushback characters.  */
     119  
     120  /* End-of-file indicator for functions returning an UCS-4 character.  */
     121  #define UEOF -1
     122  
     123  static int phase2_pushback[4];
     124  static int phase2_pushback_length;
     125  
     126  /* The input file can be in Unicode encoding (UCS-2BE, UCS-2LE, UTF-8, each
     127     with a BOM!), or otherwise the locale-dependent default encoding is used.
     128     Since we don't want to depend on the locale here, we use ISO-8859-1
     129     instead.  */
     130  enum enc
     131  {
     132    enc_undetermined,
     133    enc_ucs2be,
     134    enc_ucs2le,
     135    enc_utf8,
     136    enc_iso8859_1
     137  };
     138  static enum enc encoding;
     139  
     140  static int
     141  phase2_getc ()
     142  {
     143    if (phase2_pushback_length)
     144      return phase2_pushback[--phase2_pushback_length];
     145  
     146    if (encoding == enc_undetermined)
     147      {
     148        /* Determine the input file's encoding.  */
     149        int c0, c1;
     150  
     151        c0 = phase1_getc ();
     152        if (c0 == EOF)
     153          return UEOF;
     154        c1 = phase1_getc ();
     155        if (c1 == EOF)
     156          {
     157            phase1_ungetc (c0);
     158            encoding = enc_iso8859_1;
     159          }
     160        else if (c0 == 0xfe && c1 == 0xff)
     161          encoding = enc_ucs2be;
     162        else if (c0 == 0xff && c1 == 0xfe)
     163          encoding = enc_ucs2le;
     164        else
     165          {
     166            int c2;
     167  
     168            c2 = phase1_getc ();
     169            if (c2 == EOF)
     170              {
     171                phase1_ungetc (c1);
     172                phase1_ungetc (c0);
     173                encoding = enc_iso8859_1;
     174              }
     175            else if (c0 == 0xef && c1 == 0xbb && c2 == 0xbf)
     176              encoding = enc_utf8;
     177            else
     178              {
     179                phase1_ungetc (c2);
     180                phase1_ungetc (c1);
     181                phase1_ungetc (c0);
     182                encoding = enc_iso8859_1;
     183              }
     184          }
     185      }
     186  
     187    switch (encoding)
     188      {
     189      case enc_ucs2be:
     190        /* Read an UCS-2BE encoded character.  */
     191        {
     192          int c0, c1;
     193  
     194          c0 = phase1_getc ();
     195          if (c0 == EOF)
     196            return UEOF;
     197          c1 = phase1_getc ();
     198          if (c1 == EOF)
     199            return UEOF;
     200          return (c0 << 8) + c1;
     201        }
     202  
     203      case enc_ucs2le:
     204        /* Read an UCS-2LE encoded character.  */
     205        {
     206          int c0, c1;
     207  
     208          c0 = phase1_getc ();
     209          if (c0 == EOF)
     210            return UEOF;
     211          c1 = phase1_getc ();
     212          if (c1 == EOF)
     213            return UEOF;
     214          return c0 + (c1 << 8);
     215        }
     216  
     217      case enc_utf8:
     218        /* Read an UTF-8 encoded character.  */
     219        {
     220          unsigned char buf[6];
     221          unsigned int count;
     222          int c;
     223          ucs4_t uc;
     224  
     225          c = phase1_getc ();
     226          if (c == EOF)
     227            return UEOF;
     228          buf[0] = c;
     229          count = 1;
     230  
     231          if (buf[0] >= 0xc0)
     232            {
     233              c = phase1_getc ();
     234              if (c == EOF)
     235                return UEOF;
     236              buf[1] = c;
     237              count = 2;
     238  
     239              if (buf[0] >= 0xe0
     240                  && ((buf[1] ^ 0x80) < 0x40))
     241                {
     242                  c = phase1_getc ();
     243                  if (c == EOF)
     244                    return UEOF;
     245                  buf[2] = c;
     246                  count = 3;
     247  
     248                  if (buf[0] >= 0xf0
     249                      && ((buf[2] ^ 0x80) < 0x40))
     250                    {
     251                      c = phase1_getc ();
     252                      if (c == EOF)
     253                        return UEOF;
     254                      buf[3] = c;
     255                      count = 4;
     256  
     257                      if (buf[0] >= 0xf8
     258                          && ((buf[3] ^ 0x80) < 0x40))
     259                        {
     260                          c = phase1_getc ();
     261                          if (c == EOF)
     262                            return UEOF;
     263                          buf[4] = c;
     264                          count = 5;
     265  
     266                          if (buf[0] >= 0xfc
     267                              && ((buf[4] ^ 0x80) < 0x40))
     268                            {
     269                              c = phase1_getc ();
     270                              if (c == EOF)
     271                                return UEOF;
     272                              buf[5] = c;
     273                              count = 6;
     274                            }
     275                        }
     276                    }
     277                }
     278            }
     279  
     280          u8_mbtouc (&uc, buf, count);
     281          return uc;
     282        }
     283  
     284      case enc_iso8859_1:
     285        /* Read an ISO-8859-1 encoded character.  */
     286        {
     287          int c = phase1_getc ();
     288  
     289          if (c == EOF)
     290            return UEOF;
     291          return c;
     292        }
     293  
     294      default:
     295        abort ();
     296      }
     297  }
     298  
     299  static void
     300  phase2_ungetc (int c)
     301  {
     302    if (c != UEOF)
     303      phase2_pushback[phase2_pushback_length++] = c;
     304  }
     305  
     306  
     307  /* Phase 3: Read an UCS-4 character, with line number handling.  */
     308  
     309  static int
     310  phase3_getc ()
     311  {
     312    int c = phase2_getc ();
     313  
     314    if (c == '\n')
     315      gram_pos.line_number++;
     316  
     317    return c;
     318  }
     319  
     320  static void
     321  phase3_ungetc (int c)
     322  {
     323    if (c == '\n')
     324      --gram_pos.line_number;
     325    phase2_ungetc (c);
     326  }
     327  
     328  
     329  /* Convert from UCS-4 to UTF-8.  */
     330  static char *
     331  conv_from_ucs4 (const int *buffer, size_t buflen)
     332  {
     333    unsigned char *utf8_string;
     334    size_t pos;
     335    unsigned char *q;
     336  
     337    /* Each UCS-4 word needs 6 bytes at worst.  */
     338    utf8_string = XNMALLOC (6 * buflen + 1, unsigned char);
     339  
     340    for (pos = 0, q = utf8_string; pos < buflen; )
     341      {
     342        unsigned int uc;
     343        int n;
     344  
     345        uc = buffer[pos++];
     346        n = u8_uctomb (q, uc, 6);
     347        assert (n > 0);
     348        q += n;
     349      }
     350    *q = '\0';
     351    assert (q - utf8_string <= 6 * buflen);
     352  
     353    return (char *) utf8_string;
     354  }
     355  
     356  
     357  /* Parse a string enclosed in double-quotes.  Input is UCS-4 encoded.
     358     Return the string in UTF-8 encoding, or NULL if the input doesn't represent
     359     a valid string enclosed in double-quotes.  */
     360  static char *
     361  parse_escaped_string (const int *string, size_t length)
     362  {
     363    static int *buffer;
     364    static size_t bufmax;
     365    static size_t buflen;
     366    const int *string_limit = string + length;
     367    int c;
     368  
     369    if (string == string_limit)
     370      return NULL;
     371    c = *string++;
     372    if (c != '"')
     373      return NULL;
     374    buflen = 0;
     375    for (;;)
     376      {
     377        if (string == string_limit)
     378          return NULL;
     379        c = *string++;
     380        if (c == '"')
     381          break;
     382        if (c == '\\')
     383          {
     384            if (string == string_limit)
     385              return NULL;
     386            c = *string++;
     387            if (c >= '0' && c <= '7')
     388              {
     389                unsigned int n = 0;
     390                int j = 0;
     391                for (;;)
     392                  {
     393                    n = n * 8 + (c - '0');
     394                    if (++j == 3)
     395                      break;
     396                    if (string == string_limit)
     397                      break;
     398                    c = *string;
     399                    if (!(c >= '0' && c <= '7'))
     400                      break;
     401                    string++;
     402                  }
     403                c = n;
     404              }
     405            else if (c == 'u' || c == 'U')
     406              {
     407                unsigned int n = 0;
     408                int j;
     409                for (j = 0; j < 4; j++)
     410                  {
     411                    if (string == string_limit)
     412                      break;
     413                    c = *string;
     414                    if (c >= '0' && c <= '9')
     415                      n = n * 16 + (c - '0');
     416                    else if (c >= 'A' && c <= 'F')
     417                      n = n * 16 + (c - 'A' + 10);
     418                    else if (c >= 'a' && c <= 'f')
     419                      n = n * 16 + (c - 'a' + 10);
     420                    else
     421                      break;
     422                    string++;
     423                  }
     424                c = n;
     425              }
     426            else
     427              switch (c)
     428                {
     429                case 'a': c = '\a'; break;
     430                case 'b': c = '\b'; break;
     431                case 't': c = '\t'; break;
     432                case 'r': c = '\r'; break;
     433                case 'n': c = '\n'; break;
     434                case 'v': c = '\v'; break;
     435                case 'f': c = '\f'; break;
     436                }
     437          }
     438        if (buflen >= bufmax)
     439          {
     440            bufmax = 2 * bufmax + 10;
     441            buffer = xrealloc (buffer, bufmax * sizeof (int));
     442          }
     443        buffer[buflen++] = c;
     444      }
     445  
     446    return conv_from_ucs4 (buffer, buflen);
     447  }
     448  
     449  
     450  /* Accumulating flag comments.  */
     451  
     452  static char *special_comment;
     453  
     454  static inline void
     455  special_comment_reset ()
     456  {
     457    if (special_comment != NULL)
     458      free (special_comment);
     459    special_comment = NULL;
     460  }
     461  
     462  static void
     463  special_comment_add (const char *flag)
     464  {
     465    if (special_comment == NULL)
     466      special_comment = xstrdup (flag);
     467    else
     468      {
     469        size_t total_len = strlen (special_comment) + 2 + strlen (flag) + 1;
     470        special_comment = xrealloc (special_comment, total_len);
     471        strcat (special_comment, ", ");
     472        strcat (special_comment, flag);
     473      }
     474  }
     475  
     476  static inline void
     477  special_comment_finish ()
     478  {
     479    if (special_comment != NULL)
     480      {
     481        po_callback_comment_special (special_comment);
     482        free (special_comment);
     483        special_comment = NULL;
     484      }
     485  }
     486  
     487  
     488  /* Accumulating comments.  */
     489  
     490  static int *buffer;
     491  static size_t bufmax;
     492  static size_t buflen;
     493  static bool next_is_obsolete;
     494  static bool next_is_fuzzy;
     495  static char *fuzzy_msgstr;
     496  static bool expect_fuzzy_msgstr_as_c_comment;
     497  static bool expect_fuzzy_msgstr_as_cxx_comment;
     498  
     499  static inline void
     500  comment_start ()
     501  {
     502    buflen = 0;
     503  }
     504  
     505  static inline void
     506  comment_add (int c)
     507  {
     508    if (buflen >= bufmax)
     509      {
     510        bufmax = 2 * bufmax + 10;
     511        buffer = xrealloc (buffer, bufmax * sizeof (int));
     512      }
     513    buffer[buflen++] = c;
     514  }
     515  
     516  static void
     517  comment_line_end (size_t chars_to_remove, bool test_for_fuzzy_msgstr)
     518  {
     519    char *line;
     520  
     521    buflen -= chars_to_remove;
     522    /* Drop trailing white space, but not EOLs.  */
     523    while (buflen >= 1
     524           && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
     525      --buflen;
     526  
     527    /* At special positions we interpret a comment of the form
     528         = "escaped string"
     529       with an optional trailing semicolon as being the fuzzy msgstr, not a
     530       regular comment.  */
     531    if (test_for_fuzzy_msgstr
     532        && buflen > 2 && buffer[0] == '=' && buffer[1] == ' '
     533        && (fuzzy_msgstr =
     534            parse_escaped_string (buffer + 2,
     535                                  buflen - (buffer[buflen - 1] == ';') - 2)))
     536      return;
     537  
     538    line = conv_from_ucs4 (buffer, buflen);
     539  
     540    if (strcmp (line, "Flag: untranslated") == 0)
     541      {
     542        special_comment_add ("fuzzy");
     543        next_is_fuzzy = true;
     544      }
     545    else if (strcmp (line, "Flag: unmatched") == 0)
     546      next_is_obsolete = true;
     547    else if (strlen (line) >= 6 && memcmp (line, "Flag: ", 6) == 0)
     548      special_comment_add (line + 6);
     549    else if (strlen (line) >= 9 && memcmp (line, "Comment: ", 9) == 0)
     550      /* A comment extracted from the source.  */
     551      po_callback_comment_dot (line + 9);
     552    else
     553      {
     554        char *last_colon;
     555        unsigned long number;
     556        char *endp;
     557  
     558        if (strlen (line) >= 6 && memcmp (line, "File: ", 6) == 0
     559            && (last_colon = strrchr (line + 6, ':')) != NULL
     560            && *(last_colon + 1) != '\0'
     561            && (number = strtoul (last_colon + 1, &endp, 10), *endp == '\0'))
     562          {
     563            /* A "File: <filename>:<number>" type comment.  */
     564            *last_colon = '\0';
     565            po_callback_comment_filepos (line + 6, number);
     566          }
     567        else
     568          po_callback_comment (line);
     569      }
     570  }
     571  
     572  
     573  /* Phase 4: Replace each comment that is not inside a string with a space
     574     character.  */
     575  
     576  static int
     577  phase4_getc ()
     578  {
     579    int c;
     580  
     581    c = phase3_getc ();
     582    if (c != '/')
     583      return c;
     584    c = phase3_getc ();
     585    switch (c)
     586      {
     587      default:
     588        phase3_ungetc (c);
     589        return '/';
     590  
     591      case '*':
     592        /* C style comment.  */
     593        {
     594          bool last_was_star;
     595          size_t trailing_stars;
     596          bool seen_newline;
     597  
     598          comment_start ();
     599          last_was_star = false;
     600          trailing_stars = 0;
     601          seen_newline = false;
     602          /* Drop additional stars at the beginning of the comment.  */
     603          for (;;)
     604            {
     605              c = phase3_getc ();
     606              if (c != '*')
     607                break;
     608              last_was_star = true;
     609            }
     610          phase3_ungetc (c);
     611          for (;;)
     612            {
     613              c = phase3_getc ();
     614              if (c == UEOF)
     615                break;
     616              /* We skip all leading white space, but not EOLs.  */
     617              if (!(buflen == 0 && (c == ' ' || c == '\t')))
     618                comment_add (c);
     619              switch (c)
     620                {
     621                case '\n':
     622                  seen_newline = true;
     623                  comment_line_end (1, false);
     624                  comment_start ();
     625                  last_was_star = false;
     626                  trailing_stars = 0;
     627                  continue;
     628  
     629                case '*':
     630                  last_was_star = true;
     631                  trailing_stars++;
     632                  continue;
     633  
     634                case '/':
     635                  if (last_was_star)
     636                    {
     637                      /* Drop additional stars at the end of the comment.  */
     638                      comment_line_end (trailing_stars + 1,
     639                                        expect_fuzzy_msgstr_as_c_comment
     640                                        && !seen_newline);
     641                      break;
     642                    }
     643                  FALLTHROUGH;
     644  
     645                default:
     646                  last_was_star = false;
     647                  trailing_stars = 0;
     648                  continue;
     649                }
     650              break;
     651            }
     652          return ' ';
     653        }
     654  
     655      case '/':
     656        /* C++ style comment.  */
     657        comment_start ();
     658        for (;;)
     659          {
     660            c = phase3_getc ();
     661            if (c == '\n' || c == UEOF)
     662              break;
     663            /* We skip all leading white space, but not EOLs.  */
     664            if (!(buflen == 0 && (c == ' ' || c == '\t')))
     665              comment_add (c);
     666          }
     667        comment_line_end (0, expect_fuzzy_msgstr_as_cxx_comment);
     668        return '\n';
     669      }
     670  }
     671  
     672  static inline void
     673  phase4_ungetc (int c)
     674  {
     675    phase3_ungetc (c);
     676  }
     677  
     678  
     679  /* Return true if a character is considered as whitespace.  */
     680  static bool
     681  is_whitespace (int c)
     682  {
     683    return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'
     684            || c == '\b');
     685  }
     686  
     687  /* Return true if a character needs quoting, i.e. cannot be used in unquoted
     688     tokens.  */
     689  static bool
     690  is_quotable (int c)
     691  {
     692    if ((c >= '0' && c <= '9')
     693        || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
     694      return false;
     695    switch (c)
     696      {
     697      case '!': case '#': case '$': case '%': case '&': case '*':
     698      case '+': case '-': case '.': case '/': case ':': case '?':
     699      case '@': case '|': case '~': case '_': case '^':
     700        return false;
     701      default:
     702        return true;
     703      }
     704  }
     705  
     706  
     707  /* Read a key or value string.
     708     Return the string in UTF-8 encoding, or NULL if no string is seen.
     709     Return the start position of the string in *pos.  */
     710  static char *
     711  read_string (lex_pos_ty *pos)
     712  {
     713    static int *buffer;
     714    static size_t bufmax;
     715    static size_t buflen;
     716    int c;
     717  
     718    /* Skip whitespace before the string.  */
     719    do
     720      c = phase4_getc ();
     721    while (is_whitespace (c));
     722  
     723    if (c == UEOF)
     724      /* No more string.  */
     725      return NULL;
     726  
     727    *pos = gram_pos;
     728    buflen = 0;
     729    if (c == '"')
     730      {
     731        /* Read a string enclosed in double-quotes.  */
     732        for (;;)
     733          {
     734            c = phase3_getc ();
     735            if (c == UEOF || c == '"')
     736              break;
     737            if (c == '\\')
     738              {
     739                c = phase3_getc ();
     740                if (c == UEOF)
     741                  break;
     742                if (c >= '0' && c <= '7')
     743                  {
     744                    unsigned int n = 0;
     745                    int j = 0;
     746                    for (;;)
     747                      {
     748                        n = n * 8 + (c - '0');
     749                        if (++j == 3)
     750                          break;
     751                        c = phase3_getc ();
     752                        if (!(c >= '0' && c <= '7'))
     753                          {
     754                            phase3_ungetc (c);
     755                            break;
     756                          }
     757                      }
     758                    c = n;
     759                  }
     760                else if (c == 'u' || c == 'U')
     761                  {
     762                    unsigned int n = 0;
     763                    int j;
     764                    for (j = 0; j < 4; j++)
     765                      {
     766                        c = phase3_getc ();
     767                        if (c >= '0' && c <= '9')
     768                          n = n * 16 + (c - '0');
     769                        else if (c >= 'A' && c <= 'F')
     770                          n = n * 16 + (c - 'A' + 10);
     771                        else if (c >= 'a' && c <= 'f')
     772                          n = n * 16 + (c - 'a' + 10);
     773                        else
     774                          {
     775                            phase3_ungetc (c);
     776                            break;
     777                          }
     778                      }
     779                    c = n;
     780                  }
     781                else
     782                  switch (c)
     783                    {
     784                    case 'a': c = '\a'; break;
     785                    case 'b': c = '\b'; break;
     786                    case 't': c = '\t'; break;
     787                    case 'r': c = '\r'; break;
     788                    case 'n': c = '\n'; break;
     789                    case 'v': c = '\v'; break;
     790                    case 'f': c = '\f'; break;
     791                    }
     792              }
     793            if (buflen >= bufmax)
     794              {
     795                bufmax = 2 * bufmax + 10;
     796                buffer = xrealloc (buffer, bufmax * sizeof (int));
     797              }
     798            buffer[buflen++] = c;
     799          }
     800        if (c == UEOF)
     801          po_xerror (PO_SEVERITY_ERROR, NULL,
     802                     real_file_name, gram_pos.line_number, (size_t)(-1), false,
     803                     _("warning: unterminated string"));
     804      }
     805    else
     806      {
     807        /* Read a token outside quotes.  */
     808        if (is_quotable (c))
     809          po_xerror (PO_SEVERITY_ERROR, NULL,
     810                     real_file_name, gram_pos.line_number, (size_t)(-1), false,
     811                     _("warning: syntax error"));
     812        for (; c != UEOF && !is_quotable (c); c = phase4_getc ())
     813          {
     814            if (buflen >= bufmax)
     815              {
     816                bufmax = 2 * bufmax + 10;
     817                buffer = xrealloc (buffer, bufmax * sizeof (int));
     818              }
     819            buffer[buflen++] = c;
     820          }
     821      }
     822  
     823    return conv_from_ucs4 (buffer, buflen);
     824  }
     825  
     826  
     827  /* Read a .strings file from a stream, and dispatch to the various
     828     abstract_catalog_reader_class_ty methods.  */
     829  static void
     830  stringtable_parse (abstract_catalog_reader_ty *pop, FILE *file,
     831                     const char *real_filename, const char *logical_filename)
     832  {
     833    fp = file;
     834    real_file_name = real_filename;
     835    gram_pos.file_name = xstrdup (real_file_name);
     836    gram_pos.line_number = 1;
     837    encoding = enc_undetermined;
     838    expect_fuzzy_msgstr_as_c_comment = false;
     839    expect_fuzzy_msgstr_as_cxx_comment = false;
     840  
     841    for (;;)
     842      {
     843        char *msgid;
     844        lex_pos_ty msgid_pos;
     845        char *msgstr;
     846        lex_pos_ty msgstr_pos;
     847        int c;
     848  
     849        /* Prepare for next msgid/msgstr pair.  */
     850        special_comment_reset ();
     851        next_is_obsolete = false;
     852        next_is_fuzzy = false;
     853        fuzzy_msgstr = NULL;
     854  
     855        /* Read the key and all the comments preceding it.  */
     856        msgid = read_string (&msgid_pos);
     857        if (msgid == NULL)
     858          break;
     859  
     860        special_comment_finish ();
     861  
     862        /* Skip whitespace.  */
     863        do
     864          c = phase4_getc ();
     865        while (is_whitespace (c));
     866  
     867        /* Expect a '=' or ';'.  */
     868        if (c == UEOF)
     869          {
     870            po_xerror (PO_SEVERITY_ERROR, NULL,
     871                       real_file_name, gram_pos.line_number, (size_t)(-1), false,
     872                       _("warning: unterminated key/value pair"));
     873            break;
     874          }
     875        if (c == ';')
     876          {
     877            /* "key"; is an abbreviation for "key"=""; and does not
     878               necessarily designate an untranslated entry.  */
     879            msgstr = xstrdup ("");
     880            msgstr_pos = msgid_pos;
     881            po_callback_message (NULL, msgid, &msgid_pos, NULL,
     882                                 msgstr, strlen (msgstr) + 1, &msgstr_pos,
     883                                 NULL, NULL, NULL,
     884                                 false, next_is_obsolete);
     885          }
     886        else if (c == '=')
     887          {
     888            /* Read the value.  */
     889            msgstr = read_string (&msgstr_pos);
     890            if (msgstr == NULL)
     891              {
     892                po_xerror (PO_SEVERITY_ERROR, NULL,
     893                           real_file_name, gram_pos.line_number, (size_t)(-1),
     894                           false, _("warning: unterminated key/value pair"));
     895                break;
     896              }
     897  
     898            /* Skip whitespace.  But for fuzzy key/value pairs, look for the
     899               tentative msgstr in the form of a C style comment.  */
     900            expect_fuzzy_msgstr_as_c_comment = next_is_fuzzy;
     901            do
     902              {
     903                c = phase4_getc ();
     904                if (fuzzy_msgstr != NULL)
     905                  expect_fuzzy_msgstr_as_c_comment = false;
     906              }
     907            while (is_whitespace (c));
     908            expect_fuzzy_msgstr_as_c_comment = false;
     909  
     910            /* Expect a ';'.  */
     911            if (c == ';')
     912              {
     913                /* But for fuzzy key/value pairs, look for the tentative msgstr
     914                   in the form of a C++ style comment. */
     915                if (fuzzy_msgstr == NULL && next_is_fuzzy)
     916                  {
     917                    do
     918                      c = phase3_getc ();
     919                    while (c == ' ');
     920                    phase3_ungetc (c);
     921  
     922                    expect_fuzzy_msgstr_as_cxx_comment = true;
     923                    c = phase4_getc ();
     924                    phase4_ungetc (c);
     925                    expect_fuzzy_msgstr_as_cxx_comment = false;
     926                  }
     927                if (fuzzy_msgstr != NULL && strcmp (msgstr, msgid) == 0)
     928                  msgstr = fuzzy_msgstr;
     929  
     930                /* A key/value pair.  */
     931                po_callback_message (NULL, msgid, &msgid_pos, NULL,
     932                                     msgstr, strlen (msgstr) + 1, &msgstr_pos,
     933                                     NULL, NULL, NULL,
     934                                     false, next_is_obsolete);
     935              }
     936            else
     937              {
     938                po_xerror (PO_SEVERITY_ERROR, NULL,
     939                           real_file_name, gram_pos.line_number, (size_t)(-1),
     940                           false,
     941                           _("warning: syntax error, expected ';' after string"));
     942                break;
     943              }
     944          }
     945        else
     946          {
     947            po_xerror (PO_SEVERITY_ERROR, NULL,
     948                       real_file_name, gram_pos.line_number, (size_t)(-1), false,
     949                       _("warning: syntax error, expected '=' or ';' after string"));
     950            break;
     951          }
     952      }
     953  
     954    fp = NULL;
     955    real_file_name = NULL;
     956    gram_pos.line_number = 0;
     957  }
     958  
     959  const struct catalog_input_format input_format_stringtable =
     960  {
     961    stringtable_parse,                    /* parse */
     962    true                                  /* produces_utf8 */
     963  };