1  /* Checking of messages in PO files.
       2     Copyright (C) 1995-1998, 2000-2008, 2010-2016, 2019 Free Software Foundation, Inc.
       3     Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include <config.h>
      20  #endif
      21  
      22  /* Specification.  */
      23  #include "msgl-check.h"
      24  
      25  #include <limits.h>
      26  #include <setjmp.h>
      27  #include <signal.h>
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <string.h>
      31  #include <stdarg.h>
      32  
      33  #include "c-ctype.h"
      34  #include "xalloc.h"
      35  #include "xvasprintf.h"
      36  #include "po-xerror.h"
      37  #include "format.h"
      38  #include "plural-exp.h"
      39  #include "plural-eval.h"
      40  #include "plural-table.h"
      41  #include "c-strstr.h"
      42  #include "message.h"
      43  #include "quote.h"
      44  #include "sentence.h"
      45  #include "unictype.h"
      46  #include "unistr.h"
      47  #include "gettext.h"
      48  
      49  #define _(str) gettext (str)
      50  
      51  #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
      52  
      53  
      54  /* Evaluates the plural formula for min <= n <= max
      55     and returns the estimated number of times the value j was assumed.  */
      56  static unsigned int
      57  plural_expression_histogram (const struct plural_distribution *self,
      58                               int min, int max, unsigned long j)
      59  {
      60    if (min < 0)
      61      min = 0;
      62    /* Limit the number of evaluations.  Nothing interesting happens beyond
      63       1000.  */
      64    if (max - min > 1000)
      65      max = min + 1000;
      66    if (min <= max)
      67      {
      68        const struct expression *expr = self->expr;
      69        unsigned long n;
      70        unsigned int count;
      71  
      72        /* Protect against arithmetic exceptions.  */
      73        install_sigfpe_handler ();
      74  
      75        count = 0;
      76        for (n = min; n <= max; n++)
      77          {
      78            unsigned long val = plural_eval (expr, n);
      79  
      80            if (val == j)
      81              count++;
      82          }
      83  
      84        /* End of protection against arithmetic exceptions.  */
      85        uninstall_sigfpe_handler ();
      86  
      87        return count;
      88      }
      89    else
      90      return 0;
      91  }
      92  
      93  
      94  /* Check the values returned by plural_eval.
      95     Signals the errors through po_xerror.
      96     Return the number of errors that were seen.
      97     If no errors, returns in *DISTRIBUTION information about the plural_eval
      98     values distribution.  */
      99  int
     100  check_plural_eval (const struct expression *plural_expr,
     101                     unsigned long nplurals_value,
     102                     const message_ty *header,
     103                     struct plural_distribution *distribution)
     104  {
     105    /* Do as if the plural formula assumes a value N infinitely often if it
     106       assumes it at least 5 times.  */
     107  #define OFTEN 5
     108    unsigned char * volatile array;
     109  
     110    /* Allocate a distribution array.  */
     111    if (nplurals_value <= 100)
     112      array = XCALLOC (nplurals_value, unsigned char);
     113    else
     114      /* nplurals_value is nonsense.  Don't risk an out-of-memory.  */
     115      array = NULL;
     116  
     117    if (sigsetjmp (sigfpe_exit, 1) == 0)
     118      {
     119        unsigned long n;
     120  
     121        /* Protect against arithmetic exceptions.  */
     122        install_sigfpe_handler ();
     123  
     124        for (n = 0; n <= 1000; n++)
     125          {
     126            unsigned long val = plural_eval (plural_expr, n);
     127  
     128            if ((long) val < 0)
     129              {
     130                /* End of protection against arithmetic exceptions.  */
     131                uninstall_sigfpe_handler ();
     132  
     133                po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false,
     134                           _("plural expression can produce negative values"));
     135                free (array);
     136                return 1;
     137              }
     138            else if (val >= nplurals_value)
     139              {
     140                char *msg;
     141  
     142                /* End of protection against arithmetic exceptions.  */
     143                uninstall_sigfpe_handler ();
     144  
     145                msg = xasprintf (_("nplurals = %lu but plural expression can produce values as large as %lu"),
     146                                 nplurals_value, val);
     147                po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
     148                free (msg);
     149                free (array);
     150                return 1;
     151              }
     152  
     153            if (array != NULL && array[val] < OFTEN)
     154              array[val]++;
     155          }
     156  
     157        /* End of protection against arithmetic exceptions.  */
     158        uninstall_sigfpe_handler ();
     159  
     160        /* Normalize the array[val] statistics.  */
     161        if (array != NULL)
     162          {
     163            unsigned long val;
     164  
     165            for (val = 0; val < nplurals_value; val++)
     166              array[val] = (array[val] == OFTEN ? 1 : 0);
     167          }
     168  
     169        distribution->expr = plural_expr;
     170        distribution->often = array;
     171        distribution->often_length = (array != NULL ? nplurals_value : 0);
     172        distribution->histogram = plural_expression_histogram;
     173  
     174        return 0;
     175      }
     176    else
     177      {
     178        /* Caught an arithmetic exception.  */
     179        const char *msg;
     180  
     181        /* End of protection against arithmetic exceptions.  */
     182        uninstall_sigfpe_handler ();
     183  
     184  #if USE_SIGINFO
     185        switch (sigfpe_code)
     186  #endif
     187          {
     188  #if USE_SIGINFO
     189  # ifdef FPE_INTDIV
     190          case FPE_INTDIV:
     191            msg = _("plural expression can produce division by zero");
     192            break;
     193  # endif
     194  # ifdef FPE_INTOVF
     195          case FPE_INTOVF:
     196            msg = _("plural expression can produce integer overflow");
     197            break;
     198  # endif
     199          default:
     200  #endif
     201            msg = _("plural expression can produce arithmetic exceptions, possibly division by zero");
     202          }
     203  
     204        po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
     205  
     206        free (array);
     207  
     208        return 1;
     209      }
     210  #undef OFTEN
     211  }
     212  
     213  
     214  /* Try to help the translator by looking up the right plural formula for her.
     215     Return a freshly allocated multiline help string, or NULL.  */
     216  static char *
     217  plural_help (const char *nullentry)
     218  {
     219    struct plural_table_entry *ptentry = NULL;
     220  
     221    {
     222      const char *language;
     223  
     224      language = c_strstr (nullentry, "Language: ");
     225      if (language != NULL)
     226        {
     227          size_t len;
     228  
     229          language += 10;
     230          len = strcspn (language, " \t\n");
     231          if (len > 0)
     232            {
     233              size_t j;
     234  
     235              for (j = 0; j < plural_table_size; j++)
     236                if (len == strlen (plural_table[j].lang)
     237                    && strncmp (language, plural_table[j].lang, len) == 0)
     238                  {
     239                    ptentry = &plural_table[j];
     240                    break;
     241                  }
     242            }
     243        }
     244    }
     245  
     246    if (ptentry == NULL)
     247      {
     248        const char *language;
     249  
     250        language = c_strstr (nullentry, "Language-Team: ");
     251        if (language != NULL)
     252          {
     253            size_t j;
     254  
     255            language += 15;
     256            for (j = 0; j < plural_table_size; j++)
     257              if (strncmp (language,
     258                           plural_table[j].language,
     259                           strlen (plural_table[j].language)) == 0)
     260                {
     261                  ptentry = &plural_table[j];
     262                  break;
     263                }
     264          }
     265      }
     266  
     267    if (ptentry != NULL)
     268      {
     269        char *helpline1 =
     270          xasprintf (_("Try using the following, valid for %s:"),
     271                     ptentry->language);
     272        char *help =
     273          xasprintf ("%s\n\"Plural-Forms: %s\\n\"\n",
     274                     helpline1, ptentry->value);
     275        free (helpline1);
     276        return help;
     277      }
     278    return NULL;
     279  }
     280  
     281  
     282  /* Perform plural expression checking.
     283     Return the number of errors that were seen.
     284     If no errors, returns in *DISTRIBUTION information about the plural_eval
     285     values distribution.  */
     286  static int
     287  check_plural (message_list_ty *mlp,
     288                int ignore_untranslated_messages,
     289                int ignore_fuzzy_messages,
     290                struct plural_distribution *distributionp)
     291  {
     292    int seen_errors = 0;
     293    const message_ty *has_plural;
     294    unsigned long min_nplurals;
     295    const message_ty *min_pos;
     296    unsigned long max_nplurals;
     297    const message_ty *max_pos;
     298    struct plural_distribution distribution;
     299    size_t j;
     300    message_ty *header;
     301  
     302    /* Determine whether mlp has plural entries.  */
     303    has_plural = NULL;
     304    min_nplurals = ULONG_MAX;
     305    min_pos = NULL;
     306    max_nplurals = 0;
     307    max_pos = NULL;
     308    distribution.expr = NULL;
     309    distribution.often = NULL;
     310    distribution.often_length = 0;
     311    distribution.histogram = NULL;
     312    for (j = 0; j < mlp->nitems; j++)
     313      {
     314        message_ty *mp = mlp->item[j];
     315  
     316        if (!mp->obsolete
     317            && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
     318            && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp)))
     319            && mp->msgid_plural != NULL)
     320          {
     321            const char *p;
     322            const char *p_end;
     323            unsigned long n;
     324  
     325            if (has_plural == NULL)
     326              has_plural = mp;
     327  
     328            n = 0;
     329            for (p = mp->msgstr, p_end = p + mp->msgstr_len;
     330                 p < p_end;
     331                 p += strlen (p) + 1)
     332              n++;
     333            if (min_nplurals > n)
     334              {
     335                min_nplurals = n;
     336                min_pos = mp;
     337              }
     338            if (max_nplurals < n)
     339              {
     340                max_nplurals = n;
     341                max_pos = mp;
     342              }
     343          }
     344      }
     345  
     346    /* Look at the plural entry for this domain.
     347       Cf, function extract_plural_expression.  */
     348    header = message_list_search (mlp, NULL, "");
     349    if (header != NULL && !header->obsolete)
     350      {
     351        const char *nullentry;
     352        const char *plural;
     353        const char *nplurals;
     354  
     355        nullentry = header->msgstr;
     356  
     357        plural = c_strstr (nullentry, "plural=");
     358        nplurals = c_strstr (nullentry, "nplurals=");
     359        if (plural == NULL && has_plural != NULL)
     360          {
     361            const char *msg1 =
     362              _("message catalog has plural form translations");
     363            const char *msg2 =
     364              _("but header entry lacks a \"plural=EXPRESSION\" attribute");
     365            char *help = plural_help (nullentry);
     366  
     367            if (help != NULL)
     368              {
     369                char *msg2ext = xasprintf ("%s\n%s", msg2, help);
     370                po_xerror2 (PO_SEVERITY_ERROR,
     371                            has_plural, NULL, 0, 0, false, msg1,
     372                            header, NULL, 0, 0, true, msg2ext);
     373                free (msg2ext);
     374                free (help);
     375              }
     376            else
     377              po_xerror2 (PO_SEVERITY_ERROR,
     378                          has_plural, NULL, 0, 0, false, msg1,
     379                          header, NULL, 0, 0, false, msg2);
     380  
     381            seen_errors++;
     382          }
     383        if (nplurals == NULL && has_plural != NULL)
     384          {
     385            const char *msg1 =
     386              _("message catalog has plural form translations");
     387            const char *msg2 =
     388              _("but header entry lacks a \"nplurals=INTEGER\" attribute");
     389            char *help = plural_help (nullentry);
     390  
     391            if (help != NULL)
     392              {
     393                char *msg2ext = xasprintf ("%s\n%s", msg2, help);
     394                po_xerror2 (PO_SEVERITY_ERROR,
     395                            has_plural, NULL, 0, 0, false, msg1,
     396                            header, NULL, 0, 0, true, msg2ext);
     397                free (msg2ext);
     398                free (help);
     399              }
     400            else
     401              po_xerror2 (PO_SEVERITY_ERROR,
     402                          has_plural, NULL, 0, 0, false, msg1,
     403                          header, NULL, 0, 0, false, msg2);
     404  
     405            seen_errors++;
     406          }
     407        if (plural != NULL && nplurals != NULL)
     408          {
     409            const char *endp;
     410            unsigned long int nplurals_value;
     411            struct parse_args args;
     412            const struct expression *plural_expr;
     413  
     414            /* First check the number.  */
     415            nplurals += 9;
     416            while (*nplurals != '\0' && c_isspace ((unsigned char) *nplurals))
     417              ++nplurals;
     418            endp = nplurals;
     419            nplurals_value = 0;
     420            if (*nplurals >= '0' && *nplurals <= '9')
     421              nplurals_value = strtoul (nplurals, (char **) &endp, 10);
     422            if (nplurals == endp)
     423              {
     424                const char *msg = _("invalid nplurals value");
     425                char *help = plural_help (nullentry);
     426  
     427                if (help != NULL)
     428                  {
     429                    char *msgext = xasprintf ("%s\n%s", msg, help);
     430                    po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
     431                               msgext);
     432                    free (msgext);
     433                    free (help);
     434                  }
     435                else
     436                  po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
     437  
     438                seen_errors++;
     439              }
     440  
     441            /* Then check the expression.  */
     442            plural += 7;
     443            args.cp = plural;
     444            if (parse_plural_expression (&args) != 0)
     445              {
     446                const char *msg = _("invalid plural expression");
     447                char *help = plural_help (nullentry);
     448  
     449                if (help != NULL)
     450                  {
     451                    char *msgext = xasprintf ("%s\n%s", msg, help);
     452                    po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
     453                               msgext);
     454                    free (msgext);
     455                    free (help);
     456                  }
     457                else
     458                  po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
     459  
     460                seen_errors++;
     461              }
     462            plural_expr = args.res;
     463  
     464            /* See whether nplurals and plural fit together.  */
     465            if (!seen_errors)
     466              seen_errors =
     467                check_plural_eval (plural_expr, nplurals_value, header,
     468                                   &distribution);
     469  
     470            /* Check the number of plurals of the translations.  */
     471            if (!seen_errors)
     472              {
     473                if (min_nplurals < nplurals_value)
     474                  {
     475                    char *msg1 =
     476                      xasprintf (_("nplurals = %lu"), nplurals_value);
     477                    char *msg2 =
     478                      xasprintf (ngettext ("but some messages have only one plural form",
     479                                           "but some messages have only %lu plural forms",
     480                                           min_nplurals),
     481                                 min_nplurals);
     482                    po_xerror2 (PO_SEVERITY_ERROR,
     483                                header, NULL, 0, 0, false, msg1,
     484                                min_pos, NULL, 0, 0, false, msg2);
     485                    free (msg2);
     486                    free (msg1);
     487                    seen_errors++;
     488                  }
     489                else if (max_nplurals > nplurals_value)
     490                  {
     491                    char *msg1 =
     492                      xasprintf (_("nplurals = %lu"), nplurals_value);
     493                    char *msg2 =
     494                      xasprintf (ngettext ("but some messages have one plural form",
     495                                           "but some messages have %lu plural forms",
     496                                           max_nplurals),
     497                                 max_nplurals);
     498                    po_xerror2 (PO_SEVERITY_ERROR,
     499                                header, NULL, 0, 0, false, msg1,
     500                                max_pos, NULL, 0, 0, false, msg2);
     501                    free (msg2);
     502                    free (msg1);
     503                    seen_errors++;
     504                  }
     505                /* The only valid case is max_nplurals <= n <= min_nplurals,
     506                   which means either has_plural == NULL or
     507                   max_nplurals = n = min_nplurals.  */
     508              }
     509          }
     510        else
     511          goto no_plural;
     512      }
     513    else
     514      {
     515        if (has_plural != NULL)
     516          {
     517            po_xerror (PO_SEVERITY_ERROR, has_plural, NULL, 0, 0, false,
     518                       _("message catalog has plural form translations, but lacks a header entry with \"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\""));
     519            seen_errors++;
     520          }
     521       no_plural:
     522        /* By default, the Germanic formula (n != 1) is used.  */
     523        distribution.expr = &germanic_plural;
     524        {
     525          unsigned char *array = XCALLOC (2, unsigned char);
     526          array[1] = 1;
     527          distribution.often = array;
     528        }
     529        distribution.often_length = 2;
     530        distribution.histogram = plural_expression_histogram;
     531      }
     532  
     533    /* distribution is not needed if we report errors.
     534       Also, if there was an error due to  max_nplurals > nplurals_value,
     535       we must not use distribution because we would be doing out-of-bounds
     536       array accesses.  */
     537    if (seen_errors > 0)
     538      free ((unsigned char *) distribution.often);
     539    else
     540      *distributionp = distribution;
     541  
     542    return seen_errors;
     543  }
     544  
     545  
     546  /* Signal an error when checking format strings.  */
     547  static const message_ty *curr_mp;
     548  static lex_pos_ty curr_msgid_pos;
     549  static void
     550  formatstring_error_logger (const char *format, ...)
     551  #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 7) || __GNUC__ > 2)
     552       __attribute__ ((__format__ (__printf__, 1, 2)))
     553  #endif
     554  ;
     555  static void
     556  formatstring_error_logger (const char *format, ...)
     557  {
     558    va_list args;
     559    char *msg;
     560  
     561    va_start (args, format);
     562    if (vasprintf (&msg, format, args) < 0)
     563      error (EXIT_FAILURE, 0, _("memory exhausted"));
     564    va_end (args);
     565    po_xerror (PO_SEVERITY_ERROR,
     566               curr_mp, curr_msgid_pos.file_name, curr_msgid_pos.line_number,
     567               (size_t)(-1), false, msg);
     568    free (msg);
     569  }
     570  
     571  
     572  /* Perform miscellaneous checks on a message.
     573     PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements,
     574     PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed
     575     infinitely often by the plural formula.
     576     PLURAL_DISTRIBUTION_LENGTH is the length of the PLURAL_DISTRIBUTION
     577     array.  */
     578  static int
     579  check_pair (const message_ty *mp,
     580              const char *msgid,
     581              const lex_pos_ty *msgid_pos,
     582              const char *msgid_plural,
     583              const char *msgstr, size_t msgstr_len,
     584              const enum is_format is_format[NFORMATS],
     585              int check_newlines,
     586              int check_format_strings,
     587              const struct plural_distribution *distribution,
     588              int check_compatibility,
     589              int check_accelerators, char accelerator_char)
     590  {
     591    int seen_errors;
     592    int has_newline;
     593    unsigned int j;
     594  
     595    /* If the msgid string is empty we have the special entry reserved for
     596       information about the translation.  */
     597    if (msgid[0] == '\0')
     598      return 0;
     599  
     600    seen_errors = 0;
     601  
     602    if (check_newlines)
     603      {
     604        /* Test 1: check whether all or none of the strings begin with a '\n'.  */
     605        has_newline = (msgid[0] == '\n');
     606  #define TEST_NEWLINE(p) (p[0] == '\n')
     607        if (msgid_plural != NULL)
     608          {
     609            const char *p;
     610  
     611            if (TEST_NEWLINE(msgid_plural) != has_newline)
     612              {
     613                po_xerror (PO_SEVERITY_ERROR,
     614                           mp, msgid_pos->file_name, msgid_pos->line_number,
     615                           (size_t)(-1), false,
     616                           _("'msgid' and 'msgid_plural' entries do not both begin with '\\n'"));
     617                seen_errors++;
     618              }
     619            for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
     620              if (TEST_NEWLINE(p) != has_newline)
     621                {
     622                  char *msg =
     623                    xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both begin with '\\n'"),
     624                               j);
     625                  po_xerror (PO_SEVERITY_ERROR,
     626                             mp, msgid_pos->file_name, msgid_pos->line_number,
     627                             (size_t)(-1), false, msg);
     628                  free (msg);
     629                  seen_errors++;
     630                }
     631          }
     632        else
     633          {
     634            if (TEST_NEWLINE(msgstr) != has_newline)
     635              {
     636                po_xerror (PO_SEVERITY_ERROR,
     637                           mp, msgid_pos->file_name, msgid_pos->line_number,
     638                           (size_t)(-1), false,
     639                           _("'msgid' and 'msgstr' entries do not both begin with '\\n'"));
     640                seen_errors++;
     641              }
     642          }
     643  #undef TEST_NEWLINE
     644  
     645        /* Test 2: check whether all or none of the strings end with a '\n'.  */
     646        has_newline = (msgid[strlen (msgid) - 1] == '\n');
     647  #define TEST_NEWLINE(p) (p[0] != '\0' && p[strlen (p) - 1] == '\n')
     648        if (msgid_plural != NULL)
     649          {
     650            const char *p;
     651  
     652            if (TEST_NEWLINE(msgid_plural) != has_newline)
     653              {
     654                po_xerror (PO_SEVERITY_ERROR,
     655                           mp, msgid_pos->file_name, msgid_pos->line_number,
     656                           (size_t)(-1), false,
     657                           _("'msgid' and 'msgid_plural' entries do not both end with '\\n'"));
     658                seen_errors++;
     659              }
     660            for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
     661              if (TEST_NEWLINE(p) != has_newline)
     662                {
     663                  char *msg =
     664                    xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both end with '\\n'"),
     665                               j);
     666                  po_xerror (PO_SEVERITY_ERROR,
     667                             mp, msgid_pos->file_name, msgid_pos->line_number,
     668                             (size_t)(-1), false, msg);
     669                  free (msg);
     670                  seen_errors++;
     671                }
     672          }
     673        else
     674          {
     675            if (TEST_NEWLINE(msgstr) != has_newline)
     676              {
     677                po_xerror (PO_SEVERITY_ERROR,
     678                           mp, msgid_pos->file_name, msgid_pos->line_number,
     679                           (size_t)(-1), false,
     680                           _("'msgid' and 'msgstr' entries do not both end with '\\n'"));
     681                seen_errors++;
     682              }
     683          }
     684  #undef TEST_NEWLINE
     685      }
     686  
     687    if (check_compatibility && msgid_plural != NULL)
     688      {
     689        po_xerror (PO_SEVERITY_ERROR,
     690                   mp, msgid_pos->file_name, msgid_pos->line_number,
     691                   (size_t)(-1), false,
     692                   _("plural handling is a GNU gettext extension"));
     693        seen_errors++;
     694      }
     695  
     696    if (check_format_strings)
     697      /* Test 3: Check whether both formats strings contain the same number
     698         of format specifications.  */
     699      {
     700        curr_mp = mp;
     701        curr_msgid_pos = *msgid_pos;
     702        seen_errors +=
     703          check_msgid_msgstr_format (msgid, msgid_plural, msgstr, msgstr_len,
     704                                     is_format, mp->range, distribution,
     705                                     formatstring_error_logger);
     706      }
     707  
     708    if (check_accelerators && msgid_plural == NULL)
     709      /* Test 4: Check that if msgid is a menu item with a keyboard accelerator,
     710         the msgstr has an accelerator as well.  A keyboard accelerator is
     711         designated by an immediately preceding '&'.  We cannot check whether
     712         two accelerators collide, only whether the translator has bothered
     713         thinking about them.  */
     714      {
     715        const char *p;
     716  
     717        /* We are only interested in msgids that contain exactly one '&'.  */
     718        p = strchr (msgid, accelerator_char);
     719        if (p != NULL && strchr (p + 1, accelerator_char) == NULL)
     720          {
     721            /* Count the number of '&' in msgstr, but ignore '&&'.  */
     722            unsigned int count = 0;
     723  
     724            for (p = msgstr; (p = strchr (p, accelerator_char)) != NULL; p++)
     725              if (p[1] == accelerator_char)
     726                p++;
     727              else
     728                count++;
     729  
     730            if (count == 0)
     731              {
     732                char *msg =
     733                  xasprintf (_("msgstr lacks the keyboard accelerator mark '%c'"),
     734                             accelerator_char);
     735                po_xerror (PO_SEVERITY_ERROR,
     736                           mp, msgid_pos->file_name, msgid_pos->line_number,
     737                           (size_t)(-1), false, msg);
     738                free (msg);
     739                seen_errors++;
     740              }
     741            else if (count > 1)
     742              {
     743                char *msg =
     744                  xasprintf (_("msgstr has too many keyboard accelerator marks '%c'"),
     745                             accelerator_char);
     746                po_xerror (PO_SEVERITY_ERROR,
     747                           mp, msgid_pos->file_name, msgid_pos->line_number,
     748                           (size_t)(-1), false, msg);
     749                free (msg);
     750                seen_errors++;
     751              }
     752          }
     753      }
     754  
     755    return seen_errors;
     756  }
     757  
     758  
     759  /* Perform miscellaneous checks on a header entry.  */
     760  static int
     761  check_header_entry (const message_ty *mp, const char *msgstr_string)
     762  {
     763    static const char *required_fields[] =
     764    {
     765      "Project-Id-Version", "PO-Revision-Date", "Last-Translator",
     766      "Language-Team", "MIME-Version", "Content-Type",
     767      "Content-Transfer-Encoding",
     768      /* These are recommended but not yet required.  */
     769      "Language"
     770    };
     771    static const char *default_values[] =
     772    {
     773      "PACKAGE VERSION", "YEAR-MO-DA HO:MI+ZONE", "FULL NAME <EMAIL@ADDRESS>", "LANGUAGE <LL@li.org>", NULL,
     774      "text/plain; charset=CHARSET", "ENCODING",
     775      ""
     776    };
     777    const size_t nfields = SIZEOF (required_fields);
     778    /* FIXME: We could check if a required header field is missing and
     779       report it as error.  However, it's could be too rigorous and
     780       break backward compatibility.  */
     781  #if 0
     782    const size_t nrequiredfields = nfields - 1;
     783  #endif
     784    int seen_errors = 0;
     785    int cnt;
     786  
     787    for (cnt = 0; cnt < nfields; ++cnt)
     788      {
     789  #if 0
     790        int severity =
     791          (cnt < nrequiredfields ? PO_SEVERITY_ERROR : PO_SEVERITY_WARNING);
     792  #else
     793        int severity =
     794          PO_SEVERITY_WARNING;
     795  #endif
     796        const char *field = required_fields[cnt];
     797        size_t len = strlen (field);
     798        const char *line;
     799  
     800        for (line = msgstr_string; *line != '\0'; )
     801          {
     802            if (strncmp (line, field, len) == 0 && line[len] == ':')
     803              {
     804                const char *p = line + len + 1;
     805  
     806                /* Test whether the field's value, starting at p, is the default
     807                   value.  */
     808                if (*p == ' ')
     809                  p++;
     810                if (default_values[cnt] != NULL
     811                    && strncmp (p, default_values[cnt],
     812                                strlen (default_values[cnt])) == 0)
     813                  {
     814                    p += strlen (default_values[cnt]);
     815                    if (*p == '\0' || *p == '\n')
     816                      {
     817                        char *msg =
     818                          xasprintf (_("header field '%s' still has the initial default value\n"),
     819                                     field);
     820                        po_xerror (severity, mp, NULL, 0, 0, true, msg);
     821                        free (msg);
     822                        if (severity == PO_SEVERITY_ERROR)
     823                          seen_errors++;
     824                      }
     825                  }
     826                break;
     827              }
     828            line = strchrnul (line, '\n');
     829            if (*line == '\n')
     830              line++;
     831          }
     832        if (*line == '\0')
     833          {
     834            char *msg =
     835              xasprintf (_("header field '%s' missing in header\n"),
     836                         field);
     837            po_xerror (severity, mp, NULL, 0, 0, true, msg);
     838            free (msg);
     839            if (severity == PO_SEVERITY_ERROR)
     840              seen_errors++;
     841          }
     842      }
     843    return seen_errors;
     844  }
     845  
     846  
     847  /* Perform all checks on a non-obsolete message.
     848     Return the number of errors that were seen.  */
     849  int
     850  check_message (const message_ty *mp,
     851                 const lex_pos_ty *msgid_pos,
     852                 int check_newlines,
     853                 int check_format_strings,
     854                 const struct plural_distribution *distribution,
     855                 int check_header,
     856                 int check_compatibility,
     857                 int check_accelerators, char accelerator_char)
     858  {
     859    int seen_errors = 0;
     860  
     861    if (check_header && is_header (mp))
     862      seen_errors += check_header_entry (mp, mp->msgstr);
     863  
     864    seen_errors += check_pair (mp,
     865                               mp->msgid, msgid_pos, mp->msgid_plural,
     866                               mp->msgstr, mp->msgstr_len,
     867                               mp->is_format,
     868                               check_newlines,
     869                               check_format_strings,
     870                               distribution,
     871                               check_compatibility,
     872                               check_accelerators, accelerator_char);
     873    return seen_errors;
     874  }
     875  
     876  
     877  /* Perform all checks on a message list.
     878     Return the number of errors that were seen.  */
     879  int
     880  check_message_list (message_list_ty *mlp,
     881                      int ignore_untranslated_messages,
     882                      int ignore_fuzzy_messages,
     883                      int check_newlines,
     884                      int check_format_strings,
     885                      int check_header,
     886                      int check_compatibility,
     887                      int check_accelerators, char accelerator_char)
     888  {
     889    int seen_errors = 0;
     890    struct plural_distribution distribution;
     891    size_t j;
     892  
     893    distribution.expr = NULL;
     894    distribution.often = NULL;
     895    distribution.often_length = 0;
     896    distribution.histogram = NULL;
     897  
     898    if (check_header)
     899      seen_errors += check_plural (mlp, ignore_untranslated_messages,
     900                                   ignore_fuzzy_messages, &distribution);
     901  
     902    for (j = 0; j < mlp->nitems; j++)
     903      {
     904        message_ty *mp = mlp->item[j];
     905  
     906        if (!mp->obsolete
     907            && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
     908            && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp))))
     909          seen_errors += check_message (mp, &mp->pos,
     910                                        check_newlines,
     911                                        check_format_strings,
     912                                        &distribution,
     913                                        check_header, check_compatibility,
     914                                        check_accelerators, accelerator_char);
     915      }
     916  
     917    return seen_errors;
     918  }
     919  
     920  
     921  static int
     922  syntax_check_ellipsis_unicode (const message_ty *mp, const char *msgid)
     923  {
     924    const char *str = msgid;
     925    const char *str_limit = str + strlen (msgid);
     926    int seen_errors = 0;
     927  
     928    while (str < str_limit)
     929      {
     930        const char *end, *cp;
     931        ucs4_t ending_char;
     932  
     933        end = sentence_end (str, &ending_char);
     934  
     935        /* sentence_end doesn't treat '...' specially.  */
     936        cp = end - (ending_char == '.' ? 2 : 3);
     937        if (cp >= str && memcmp (cp, "...", 3) == 0)
     938          {
     939            po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
     940                       _("ASCII ellipsis ('...') instead of Unicode"));
     941            seen_errors++;
     942          }
     943  
     944        str = end + 1;
     945      }
     946  
     947    return seen_errors;
     948  }
     949  
     950  
     951  static int
     952  syntax_check_space_ellipsis (const message_ty *mp, const char *msgid)
     953  {
     954    const char *str = msgid;
     955    const char *str_limit = str + strlen (msgid);
     956    int seen_errors = 0;
     957  
     958    while (str < str_limit)
     959      {
     960        const char *end, *ellipsis = NULL;
     961        ucs4_t ending_char;
     962  
     963        end = sentence_end (str, &ending_char);
     964  
     965        if (ending_char == 0x2026)
     966          ellipsis = end;
     967        else if (ending_char == '.')
     968          {
     969            /* sentence_end doesn't treat '...' specially.  */
     970            const char *cp = end - 2;
     971            if (cp >= str && memcmp (cp, "...", 3) == 0)
     972              ellipsis = cp;
     973          }
     974        else
     975          {
     976            /* Look for a '...'.  */
     977            const char *cp = end - 3;
     978            if (cp >= str && memcmp (cp, "...", 3) == 0)
     979              ellipsis = cp;
     980            else
     981              {
     982                ucs4_t uc = 0xfffd;
     983  
     984                /* Look for a U+2026.  */
     985                for (cp = end - 1; cp >= str; cp--)
     986                  {
     987                    u8_mbtouc (&uc, (const unsigned char *) cp, end - cp);
     988                    if (uc != 0xfffd)
     989                      break;
     990                  }
     991  
     992                if (uc == 0x2026)
     993                  ellipsis = cp;
     994              }
     995          }
     996  
     997        if (ellipsis)
     998          {
     999            const char *cp;
    1000            ucs4_t uc = 0xfffd;
    1001  
    1002            /* Look at the character before ellipsis.  */
    1003            for (cp = ellipsis - 1; cp >= str; cp--)
    1004              {
    1005                u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
    1006                if (uc != 0xfffd)
    1007                  break;
    1008              }
    1009  
    1010            if (uc != 0xfffd && uc_is_space (uc))
    1011              {
    1012                po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
    1013                           _("space before ellipsis found in user visible strings"));
    1014                seen_errors++;
    1015              }
    1016          }
    1017  
    1018        str = end + 1;
    1019      }
    1020  
    1021    return seen_errors;
    1022  }
    1023  
    1024  
    1025  struct callback_arg
    1026  {
    1027    const message_ty *mp;
    1028    int seen_errors;
    1029  };
    1030  
    1031  static void
    1032  syntax_check_quote_unicode_callback (char quote, const char *quoted,
    1033                                       size_t quoted_length, void *data)
    1034  {
    1035    struct callback_arg *arg = data;
    1036  
    1037    switch (quote)
    1038      {
    1039      case '"':
    1040        po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
    1041                   _("ASCII double quote used instead of Unicode"));
    1042        arg->seen_errors++;
    1043        break;
    1044  
    1045      case '\'':
    1046        po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
    1047                   _("ASCII single quote used instead of Unicode"));
    1048        arg->seen_errors++;
    1049        break;
    1050  
    1051      default:
    1052        break;
    1053      }
    1054  }
    1055  
    1056  static int
    1057  syntax_check_quote_unicode (const message_ty *mp, const char *msgid)
    1058  {
    1059    struct callback_arg arg;
    1060  
    1061    arg.mp = mp;
    1062    arg.seen_errors = 0;
    1063  
    1064    scan_quoted (msgid, strlen (msgid),
    1065                 syntax_check_quote_unicode_callback, &arg);
    1066  
    1067    return arg.seen_errors;
    1068  }
    1069  
    1070  struct bullet_ty
    1071  {
    1072    int c;
    1073    size_t depth;
    1074  };
    1075  
    1076  struct bullet_stack_ty
    1077  {
    1078    struct bullet_ty *items;
    1079    size_t nitems;
    1080    size_t nitems_max;
    1081  };
    1082  
    1083  static struct bullet_stack_ty bullet_stack;
    1084  
    1085  static int
    1086  syntax_check_bullet_unicode (const message_ty *mp, const char *msgid)
    1087  {
    1088    const char *str = msgid;
    1089    const char *str_limit = str + strlen (msgid);
    1090    struct bullet_ty *last_bullet = NULL;
    1091    bool seen_error = false;
    1092  
    1093    bullet_stack.nitems = 0;
    1094  
    1095    while (str < str_limit)
    1096      {
    1097        const char *p = str, *end;
    1098  
    1099        while (p < str_limit && c_isspace (*p))
    1100          p++;
    1101  
    1102        if ((*p == '*' || *p == '-') && *(p + 1) == ' ')
    1103          {
    1104            size_t depth = p - str;
    1105            if (last_bullet == NULL || depth > last_bullet->depth)
    1106              {
    1107                struct bullet_ty bullet;
    1108  
    1109                bullet.c = *p;
    1110                bullet.depth = depth;
    1111  
    1112                if (bullet_stack.nitems >= bullet_stack.nitems_max)
    1113                  {
    1114                    bullet_stack.nitems_max = 2 * bullet_stack.nitems_max + 4;
    1115                    bullet_stack.items = xrealloc (bullet_stack.items,
    1116                                                   bullet_stack.nitems_max
    1117                                                   * sizeof (struct bullet_ty));
    1118                  }
    1119  
    1120                last_bullet = &bullet_stack.items[bullet_stack.nitems++];
    1121                memcpy (last_bullet, &bullet, sizeof (struct bullet_ty));
    1122              }
    1123            else
    1124              {
    1125                if (depth < last_bullet->depth)
    1126                  {
    1127                    if (bullet_stack.nitems > 1)
    1128                      {
    1129                        bullet_stack.nitems--;
    1130                        last_bullet =
    1131                          &bullet_stack.items[bullet_stack.nitems - 1];
    1132                      }
    1133                    else
    1134                      last_bullet = NULL;
    1135                  }
    1136  
    1137                if (last_bullet && depth == last_bullet->depth)
    1138                  {
    1139                    if (last_bullet->c != *p)
    1140                      last_bullet->c = *p;
    1141                    else
    1142                      {
    1143                        seen_error = true;
    1144                        break;
    1145                      }
    1146                  }
    1147              }
    1148          }
    1149        else
    1150          {
    1151            bullet_stack.nitems = 0;
    1152            last_bullet = NULL;
    1153          }
    1154  
    1155        end = strchrnul (str, '\n');
    1156        str = end + 1;
    1157      }
    1158  
    1159    if (seen_error)
    1160      {
    1161        char *msg;
    1162        msg = xasprintf (_("ASCII bullet ('%c') instead of Unicode"),
    1163                         last_bullet->c);
    1164        po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false, msg);
    1165        free (msg);
    1166        return 1;
    1167      }
    1168  
    1169    return 0;
    1170  }
    1171  
    1172  
    1173  typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid);
    1174  static const syntax_check_function sc_funcs[NSYNTAXCHECKS] =
    1175  {
    1176    syntax_check_ellipsis_unicode,
    1177    syntax_check_space_ellipsis,
    1178    syntax_check_quote_unicode,
    1179    syntax_check_bullet_unicode
    1180  };
    1181  
    1182  /* Perform all syntax checks on a non-obsolete message.
    1183     Return the number of errors that were seen.  */
    1184  static int
    1185  syntax_check_message (const message_ty *mp)
    1186  {
    1187    int seen_errors = 0;
    1188    int i;
    1189  
    1190    for (i = 0; i < NSYNTAXCHECKS; i++)
    1191      {
    1192        if (mp->do_syntax_check[i] == yes)
    1193          {
    1194            seen_errors += sc_funcs[i] (mp, mp->msgid);
    1195            if (mp->msgid_plural)
    1196              seen_errors += sc_funcs[i] (mp, mp->msgid_plural);
    1197          }
    1198      }
    1199  
    1200    return seen_errors;
    1201  }
    1202  
    1203  
    1204  /* Perform all syntax checks on a message list.
    1205     Return the number of errors that were seen.  */
    1206  int
    1207  syntax_check_message_list (message_list_ty *mlp)
    1208  {
    1209    int seen_errors = 0;
    1210    size_t j;
    1211  
    1212    for (j = 0; j < mlp->nitems; j++)
    1213      {
    1214        message_ty *mp = mlp->item[j];
    1215  
    1216        if (!is_header (mp))
    1217          seen_errors += syntax_check_message (mp);
    1218      }
    1219  
    1220    return seen_errors;
    1221  }