(root)/
gettext-0.22.4/
gettext-tools/
src/
format-php.c
       1  /* PHP format strings.
       2     Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020, 2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2002.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include <config.h>
      20  #endif
      21  
      22  #include <stdbool.h>
      23  #include <stdlib.h>
      24  
      25  #include "format.h"
      26  #include "c-ctype.h"
      27  #include "xalloc.h"
      28  #include "xvasprintf.h"
      29  #include "format-invalid.h"
      30  #include "gettext.h"
      31  
      32  #define _(str) gettext (str)
      33  
      34  /* PHP format strings are described in phpdoc-4.0.6, file
      35     phpdoc/manual/function.sprintf.html, and are implemented in
      36     php-4.1.0/ext/standard/formatted_print.c.
      37     A directive
      38     - starts with '%' or '%m$' where m is a positive integer,
      39     - is optionally followed by any of the characters '0', '-', ' ', or
      40       "'<anychar>", each of which acts as a flag,
      41     - is optionally followed by a width specification: a nonempty digit
      42       sequence,
      43     - is optionally followed by '.' and a precision specification: a nonempty
      44       digit sequence,
      45     - is optionally followed by a size specifier 'l', which is ignored,
      46     - is finished by a specifier
      47         - 's', that needs a string argument,
      48         - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
      49         - 'e', 'f', that need a floating-point argument,
      50         - 'c', that needs a character argument.
      51     Additionally there is the directive '%%', which takes no argument.
      52     Numbered and unnumbered argument specifications can be used in the same
      53     string.  Numbered argument specifications have no influence on the
      54     "current argument index", that is incremented each time an argument is read.
      55   */
      56  
      57  enum format_arg_type
      58  {
      59    FAT_INTEGER,
      60    FAT_FLOAT,
      61    FAT_CHARACTER,
      62    FAT_STRING
      63  };
      64  
      65  struct numbered_arg
      66  {
      67    unsigned int number;
      68    enum format_arg_type type;
      69  };
      70  
      71  struct spec
      72  {
      73    unsigned int directives;
      74    unsigned int numbered_arg_count;
      75    struct numbered_arg *numbered;
      76  };
      77  
      78  /* Locale independent test for a decimal digit.
      79     Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
      80     <ctype.h> isdigit must be an 'unsigned char'.)  */
      81  #undef isdigit
      82  #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
      83  
      84  
      85  static int
      86  numbered_arg_compare (const void *p1, const void *p2)
      87  {
      88    unsigned int n1 = ((const struct numbered_arg *) p1)->number;
      89    unsigned int n2 = ((const struct numbered_arg *) p2)->number;
      90  
      91    return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
      92  }
      93  
      94  static void *
      95  format_parse (const char *format, bool translated, char *fdi,
      96                char **invalid_reason)
      97  {
      98    const char *const format_start = format;
      99    unsigned int directives;
     100    unsigned int numbered_arg_count;
     101    struct numbered_arg *numbered;
     102    unsigned int numbered_allocated;
     103    unsigned int unnumbered_arg_count;
     104    struct spec *result;
     105  
     106    directives = 0;
     107    numbered_arg_count = 0;
     108    numbered = NULL;
     109    numbered_allocated = 0;
     110    unnumbered_arg_count = 0;
     111  
     112    for (; *format != '\0';)
     113      if (*format++ == '%')
     114        {
     115          /* A directive.  */
     116          FDI_SET (format - 1, FMTDIR_START);
     117          directives++;
     118  
     119          if (*format != '%')
     120            {
     121              /* A complex directive.  */
     122              unsigned int number;
     123              enum format_arg_type type;
     124  
     125              number = ++unnumbered_arg_count;
     126              if (isdigit (*format))
     127                {
     128                  const char *f = format;
     129                  unsigned int m = 0;
     130  
     131                  do
     132                    {
     133                      m = 10 * m + (*f - '0');
     134                      f++;
     135                    }
     136                  while (isdigit (*f));
     137  
     138                  if (*f == '$')
     139                    {
     140                      if (m == 0)
     141                        {
     142                          *invalid_reason = INVALID_ARGNO_0 (directives);
     143                          FDI_SET (f, FMTDIR_ERROR);
     144                          goto bad_format;
     145                        }
     146                      number = m;
     147                      format = ++f;
     148                      --unnumbered_arg_count;
     149                    }
     150                }
     151  
     152              /* Parse flags.  */
     153              for (;;)
     154                {
     155                  if (*format == '0' || *format == '-' || *format == ' ')
     156                    format++;
     157                  else if (*format == '\'')
     158                    {
     159                      format++;
     160                      if (*format == '\0')
     161                        {
     162                          *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
     163                          FDI_SET (format - 1, FMTDIR_ERROR);
     164                          goto bad_format;
     165                        }
     166                      format++;
     167                    }
     168                  else
     169                    break;
     170                }
     171  
     172              /* Parse width.  */
     173              if (isdigit (*format))
     174                {
     175                  do
     176                    format++;
     177                  while (isdigit (*format));
     178                }
     179  
     180              /* Parse precision.  */
     181              if (*format == '.')
     182                {
     183                  format++;
     184  
     185                  if (isdigit (*format))
     186                    {
     187                      do
     188                        format++;
     189                      while (isdigit (*format));
     190                    }
     191                  else
     192                    --format;     /* will jump to bad_format */
     193                }
     194  
     195              /* Parse size.  */
     196              if (*format == 'l')
     197                format++;
     198  
     199              switch (*format)
     200                {
     201                case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
     202                  type = FAT_INTEGER;
     203                  break;
     204                case 'e': case 'f':
     205                  type = FAT_FLOAT;
     206                  break;
     207                case 'c':
     208                  type = FAT_CHARACTER;
     209                  break;
     210                case 's':
     211                  type = FAT_STRING;
     212                  break;
     213                default:
     214                  if (*format == '\0')
     215                    {
     216                      *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
     217                      FDI_SET (format - 1, FMTDIR_ERROR);
     218                    }
     219                  else
     220                    {
     221                      *invalid_reason =
     222                        INVALID_CONVERSION_SPECIFIER (directives, *format);
     223                      FDI_SET (format, FMTDIR_ERROR);
     224                    }
     225                  goto bad_format;
     226                }
     227  
     228              if (numbered_allocated == numbered_arg_count)
     229                {
     230                  numbered_allocated = 2 * numbered_allocated + 1;
     231                  numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
     232                }
     233              numbered[numbered_arg_count].number = number;
     234              numbered[numbered_arg_count].type = type;
     235              numbered_arg_count++;
     236            }
     237  
     238          FDI_SET (format, FMTDIR_END);
     239  
     240          format++;
     241        }
     242  
     243    /* Sort the numbered argument array, and eliminate duplicates.  */
     244    if (numbered_arg_count > 1)
     245      {
     246        unsigned int i, j;
     247        bool err;
     248  
     249        qsort (numbered, numbered_arg_count,
     250               sizeof (struct numbered_arg), numbered_arg_compare);
     251  
     252        /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
     253        err = false;
     254        for (i = j = 0; i < numbered_arg_count; i++)
     255          if (j > 0 && numbered[i].number == numbered[j-1].number)
     256            {
     257              enum format_arg_type type1 = numbered[i].type;
     258              enum format_arg_type type2 = numbered[j-1].type;
     259              enum format_arg_type type_both;
     260  
     261              if (type1 == type2)
     262                type_both = type1;
     263              else
     264                {
     265                  /* Incompatible types.  */
     266                  type_both = type1;
     267                  if (!err)
     268                    *invalid_reason =
     269                      INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
     270                  err = true;
     271                }
     272  
     273              numbered[j-1].type = type_both;
     274            }
     275          else
     276            {
     277              if (j < i)
     278                {
     279                  numbered[j].number = numbered[i].number;
     280                  numbered[j].type = numbered[i].type;
     281                }
     282              j++;
     283            }
     284        numbered_arg_count = j;
     285        if (err)
     286          /* *invalid_reason has already been set above.  */
     287          goto bad_format;
     288      }
     289  
     290    result = XMALLOC (struct spec);
     291    result->directives = directives;
     292    result->numbered_arg_count = numbered_arg_count;
     293    result->numbered = numbered;
     294    return result;
     295  
     296   bad_format:
     297    if (numbered != NULL)
     298      free (numbered);
     299    return NULL;
     300  }
     301  
     302  static void
     303  format_free (void *descr)
     304  {
     305    struct spec *spec = (struct spec *) descr;
     306  
     307    if (spec->numbered != NULL)
     308      free (spec->numbered);
     309    free (spec);
     310  }
     311  
     312  static int
     313  format_get_number_of_directives (void *descr)
     314  {
     315    struct spec *spec = (struct spec *) descr;
     316  
     317    return spec->directives;
     318  }
     319  
     320  static bool
     321  format_check (void *msgid_descr, void *msgstr_descr, bool equality,
     322                formatstring_error_logger_t error_logger,
     323                const char *pretty_msgid, const char *pretty_msgstr)
     324  {
     325    struct spec *spec1 = (struct spec *) msgid_descr;
     326    struct spec *spec2 = (struct spec *) msgstr_descr;
     327    bool err = false;
     328  
     329    if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
     330      {
     331        unsigned int i, j;
     332        unsigned int n1 = spec1->numbered_arg_count;
     333        unsigned int n2 = spec2->numbered_arg_count;
     334  
     335        /* Check that the argument numbers are the same.
     336           Both arrays are sorted.  We search for the first difference.  */
     337        for (i = 0, j = 0; i < n1 || j < n2; )
     338          {
     339            int cmp = (i >= n1 ? 1 :
     340                       j >= n2 ? -1 :
     341                       spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
     342                       spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
     343                       0);
     344  
     345            if (cmp > 0)
     346              {
     347                if (error_logger)
     348                  error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
     349                                spec2->numbered[j].number, pretty_msgstr,
     350                                pretty_msgid);
     351                err = true;
     352                break;
     353              }
     354            else if (cmp < 0)
     355              {
     356                if (equality)
     357                  {
     358                    if (error_logger)
     359                      error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
     360                                    spec1->numbered[i].number, pretty_msgstr);
     361                    err = true;
     362                    break;
     363                  }
     364                else
     365                  i++;
     366              }
     367            else
     368              j++, i++;
     369          }
     370        /* Check the argument types are the same.  */
     371        if (!err)
     372          for (i = 0, j = 0; j < n2; )
     373            {
     374              if (spec1->numbered[i].number == spec2->numbered[j].number)
     375                {
     376                  if (spec1->numbered[i].type != spec2->numbered[j].type)
     377                    {
     378                      if (error_logger)
     379                        error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
     380                                      pretty_msgid, pretty_msgstr,
     381                                      spec2->numbered[j].number);
     382                      err = true;
     383                      break;
     384                    }
     385                  j++, i++;
     386                }
     387              else
     388                i++;
     389            }
     390      }
     391  
     392    return err;
     393  }
     394  
     395  
     396  struct formatstring_parser formatstring_php =
     397  {
     398    format_parse,
     399    format_free,
     400    format_get_number_of_directives,
     401    NULL,
     402    format_check
     403  };
     404  
     405  
     406  #ifdef TEST
     407  
     408  /* Test program: Print the argument list specification returned by
     409     format_parse for strings read from standard input.  */
     410  
     411  #include <stdio.h>
     412  
     413  static void
     414  format_print (void *descr)
     415  {
     416    struct spec *spec = (struct spec *) descr;
     417    unsigned int last;
     418    unsigned int i;
     419  
     420    if (spec == NULL)
     421      {
     422        printf ("INVALID");
     423        return;
     424      }
     425  
     426    printf ("(");
     427    last = 1;
     428    for (i = 0; i < spec->numbered_arg_count; i++)
     429      {
     430        unsigned int number = spec->numbered[i].number;
     431  
     432        if (i > 0)
     433          printf (" ");
     434        if (number < last)
     435          abort ();
     436        for (; last < number; last++)
     437          printf ("_ ");
     438        switch (spec->numbered[i].type)
     439          {
     440          case FAT_INTEGER:
     441            printf ("i");
     442            break;
     443          case FAT_FLOAT:
     444            printf ("f");
     445            break;
     446          case FAT_CHARACTER:
     447            printf ("c");
     448            break;
     449          case FAT_STRING:
     450            printf ("s");
     451            break;
     452          default:
     453            abort ();
     454          }
     455        last = number + 1;
     456      }
     457    printf (")");
     458  }
     459  
     460  int
     461  main ()
     462  {
     463    for (;;)
     464      {
     465        char *line = NULL;
     466        size_t line_size = 0;
     467        int line_len;
     468        char *invalid_reason;
     469        void *descr;
     470  
     471        line_len = getline (&line, &line_size, stdin);
     472        if (line_len < 0)
     473          break;
     474        if (line_len > 0 && line[line_len - 1] == '\n')
     475          line[--line_len] = '\0';
     476  
     477        invalid_reason = NULL;
     478        descr = format_parse (line, false, NULL, &invalid_reason);
     479  
     480        format_print (descr);
     481        printf ("\n");
     482        if (descr == NULL)
     483          printf ("%s\n", invalid_reason);
     484  
     485        free (invalid_reason);
     486        free (line);
     487      }
     488  
     489    return 0;
     490  }
     491  
     492  /*
     493   * For Emacs M-x compile
     494   * Local Variables:
     495   * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-php.c ../gnulib-lib/libgettextlib.la"
     496   * End:
     497   */
     498  
     499  #endif /* TEST */