(root)/
gettext-0.22.4/
gettext-tools/
src/
format-python-brace.c
       1  /* Python brace format strings.
       2     Copyright (C) 2004, 2006-2007, 2013-2014, 2016, 2019, 2023 Free Software Foundation, Inc.
       3     Written by Daiki Ueno <ueno@gnu.org>, 2013.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include <config.h>
      20  #endif
      21  
      22  #include <stdbool.h>
      23  #include <stdlib.h>
      24  #include <string.h>
      25  
      26  #include "format.h"
      27  #include "c-ctype.h"
      28  #include "xalloc.h"
      29  #include "xvasprintf.h"
      30  #include "format-invalid.h"
      31  #include "gettext.h"
      32  
      33  #define _(str) gettext (str)
      34  
      35  /* Python brace format strings are defined by PEP3101 together with the
      36     'format' method of the string class.
      37     Documentation:
      38       https://peps.python.org/pep-3101/
      39       https://docs.python.org/3/library/string.html#formatstrings
      40     A format string directive here consists of
      41       - an opening brace '{',
      42       - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
      43       - an optional sequence of
      44           - getattr ('.' identifier) or
      45           - getitem ('[' identifier ']')
      46         operators,
      47       - optionally, a ':' and a format specifier, where a format specifier is
      48         - either a format directive of the form '{' ... '}' without a format
      49           specifier, or
      50         - of the form [[fill]align][sign][#][0][minimumwidth][.precision][type]
      51           where
      52             - the fill character is any character,
      53             - the align flag is one of '<', '>', '=', '^',
      54             - the sign is one of '+', '-', ' ',
      55             - the # flag is '#',
      56             - the 0 flag is '0',
      57             - minimumwidth is a non-empty sequence of digits,
      58             - precision is a non-empty sequence of digits,
      59             - type is one of
      60               - 'b', 'c', 'd', 'o', 'x', 'X', 'n' for integers,
      61               - 'e', 'E', 'f', 'F', 'g', 'G', 'n', '%' for floating-point values,
      62       - a closing brace '}'.
      63     Brace characters '{' and '}' can be escaped by doubling them: '{{' and '}}'.
      64  */
      65  
      66  struct named_arg
      67  {
      68    char *name;
      69  };
      70  
      71  struct spec
      72  {
      73    unsigned int directives;
      74    unsigned int named_arg_count;
      75    unsigned int allocated;
      76    struct named_arg *named;
      77  };
      78  
      79  
      80  /* Forward declaration of local functions.  */
      81  static void free_named_args (struct spec *spec);
      82  
      83  
      84  /* All the parse_* functions (except parse_upto) follow the same
      85     calling convention.  FORMATP shall point to the beginning of a token.
      86     If parsing succeeds, FORMATP will point to the next character after
      87     the token, and true is returned.  Otherwise, FORMATP will be
      88     unchanged and false is returned.  */
      89  
      90  static bool
      91  parse_named_field (struct spec *spec,
      92                     const char **formatp, bool translated, char *fdi,
      93                     char **invalid_reason)
      94  {
      95    const char *format = *formatp;
      96    char c;
      97  
      98    c = *format;
      99    if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
     100      {
     101        do
     102          c = *++format;
     103        while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
     104               || (c >= '0' && c <= '9'));
     105        *formatp = format;
     106        return true;
     107      }
     108    return false;
     109  }
     110  
     111  static bool
     112  parse_numeric_field (struct spec *spec,
     113                       const char **formatp, bool translated, char *fdi,
     114                       char **invalid_reason)
     115  {
     116    const char *format = *formatp;
     117    char c;
     118  
     119    c = *format;
     120    if (c >= '0' && c <= '9')
     121      {
     122        do
     123          c = *++format;
     124        while (c >= '0' && c <= '9');
     125        *formatp = format;
     126        return true;
     127      }
     128    return false;
     129  }
     130  
     131  /* Parses a directive.
     132     When this function is invoked, *formatp points to the start of the directive,
     133     i.e. to the '{' character.
     134     When this function returns true, *formatp points to the first character after
     135     the directive, i.e. in most cases to the character after the '}' character.
     136   */
     137  static bool
     138  parse_directive (struct spec *spec,
     139                   const char **formatp, bool is_toplevel,
     140                   bool translated, char *fdi, char **invalid_reason)
     141  {
     142    const char *format = *formatp;
     143    const char *const format_start = format;
     144    const char *name_start;
     145    char c;
     146  
     147    c = *++format;
     148    if (c == '{')
     149      {
     150        /* An escaped '{'.  */
     151        *formatp = ++format;
     152        return true;
     153      }
     154  
     155    name_start = format;
     156    if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)
     157        && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason))
     158      {
     159        *invalid_reason =
     160          xasprintf (_("In the directive number %u, '%c' cannot start a field name."),
     161                     spec->directives, *format);
     162        FDI_SET (format, FMTDIR_ERROR);
     163        return false;
     164      }
     165  
     166    /* Parse '.' (getattr) or '[..]' (getitem) operators followed by a
     167       name.  If must not recurse, but can be specifed in a chain, such
     168       as "foo.bar.baz[0]".  */
     169    for (;;)
     170      {
     171        c = *format;
     172  
     173        if (c == '.')
     174          {
     175            format++;
     176            if (!parse_named_field (spec, &format, translated, fdi,
     177                                    invalid_reason))
     178              {
     179                *invalid_reason =
     180                  xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."),
     181                             spec->directives, *format);
     182                FDI_SET (format, FMTDIR_ERROR);
     183                return false;
     184              }
     185          }
     186        else if (c == '[')
     187          {
     188            format++;
     189            if (!parse_named_field (spec, &format, translated, fdi,
     190                                    invalid_reason)
     191                && !parse_numeric_field (spec, &format, translated, fdi,
     192                                         invalid_reason))
     193              {
     194                *invalid_reason =
     195                  xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."),
     196                             spec->directives, *format);
     197                FDI_SET (format, FMTDIR_ERROR);
     198                return false;
     199              }
     200  
     201            if (*format != ']')
     202              {
     203                *invalid_reason =
     204                  xasprintf (_("In the directive number %u, there is an unterminated getitem argument."),
     205                             spec->directives);
     206                FDI_SET (format, FMTDIR_ERROR);
     207                return false;
     208              }
     209            format++;
     210          }
     211        else
     212          break;
     213      }
     214  
     215    /* Here c == *format.  */
     216    if (c == ':')
     217      {
     218        if (!is_toplevel)
     219          {
     220            *invalid_reason =
     221              xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."),
     222                         spec->directives);
     223            FDI_SET (format, FMTDIR_ERROR);
     224            return false;
     225          }
     226  
     227        format++;
     228  
     229        /* Format specifiers.  Although a format specifier can be any
     230           string in theory, we can only recognize two types of format
     231           specifiers below, because otherwise we would need to evaluate
     232           Python expressions by ourselves:
     233  
     234             - A nested format directive expanding to an argument
     235             - The Standard Format Specifiers, as described in PEP3101,
     236               not including a nested format directive  */
     237        if (*format == '{')
     238          {
     239            /* Nested format directive.  */
     240            if (!parse_directive (spec, &format, false, translated, fdi,
     241                                  invalid_reason))
     242              {
     243                /* FDI and INVALID_REASON will be set by a recursive call of
     244                   parse_directive.  */
     245                return false;
     246              }
     247          }
     248        else
     249          {
     250            /* Standard format specifiers is in the form:
     251               [[fill]align][sign][#][0][minimumwidth][.precision][type]  */
     252  
     253            /* Look ahead two characters to skip [[fill]align].  */
     254            int c1, c2;
     255  
     256            c1 = format[0];
     257            if (c1 == '\0')
     258              {
     259                *invalid_reason =
     260                  xasprintf (_("In the directive number %u, there is an unterminated format directive."),
     261                             spec->directives);
     262                FDI_SET (format, FMTDIR_ERROR);
     263                return false;
     264              }
     265  
     266            c2 = format[1];
     267  
     268            if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^')
     269              format += 2;
     270            else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^')
     271              format++;
     272  
     273            if (*format == '+' || *format == '-' || *format == ' ')
     274              format++;
     275            if (*format == '#')
     276              format++;
     277            if (*format == '0')
     278              format++;
     279  
     280            /* Parse the optional minimumwidth.  */
     281            while (c_isdigit (*format))
     282              format++;
     283  
     284            /* Parse the optional .precision.  */
     285            if (*format == '.')
     286              {
     287                format++;
     288                if (c_isdigit (*format))
     289                  do
     290                    format++;
     291                  while (c_isdigit (*format));
     292                else
     293                  format--;
     294              }
     295  
     296            switch (*format)
     297              {
     298              case 'b': case 'c': case 'd': case 'o': case 'x': case 'X':
     299              case 'n':
     300              case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
     301              case '%':
     302                format++;
     303                break;
     304              default:
     305                break;
     306              }
     307          }
     308      }
     309  
     310    if (*format != '}')
     311      {
     312        *invalid_reason =
     313          xasprintf (_("In the directive number %u, there is an unterminated format directive."),
     314                     spec->directives);
     315        FDI_SET (format, FMTDIR_ERROR);
     316        return false;
     317      }
     318  
     319    if (is_toplevel)
     320      {
     321        char *name;
     322        size_t n = format - name_start;
     323  
     324        FDI_SET (name_start - 1, FMTDIR_START);
     325  
     326        name = XNMALLOC (n + 1, char);
     327        memcpy (name, name_start, n);
     328        name[n] = '\0';
     329  
     330        spec->directives++;
     331  
     332        if (spec->allocated == spec->named_arg_count)
     333          {
     334            spec->allocated = 2 * spec->allocated + 1;
     335            spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg));
     336          }
     337        spec->named[spec->named_arg_count].name = name;
     338        spec->named_arg_count++;
     339  
     340        FDI_SET (format, FMTDIR_END);
     341      }
     342  
     343    *formatp = ++format;
     344    return true;
     345  }
     346  
     347  static bool
     348  parse_upto (struct spec *spec,
     349              const char **formatp, bool is_toplevel, char terminator,
     350              bool translated, char *fdi, char **invalid_reason)
     351  {
     352    const char *format = *formatp;
     353  
     354    for (; *format != terminator && *format != '\0';)
     355      {
     356        if (*format == '{')
     357          {
     358            if (!parse_directive (spec, &format, is_toplevel, translated, fdi,
     359                                  invalid_reason))
     360              return false;
     361          }
     362        else
     363          format++;
     364      }
     365  
     366    *formatp = format;
     367    return true;
     368  }
     369  
     370  static int
     371  named_arg_compare (const void *p1, const void *p2)
     372  {
     373    return strcmp (((const struct named_arg *) p1)->name,
     374                   ((const struct named_arg *) p2)->name);
     375  }
     376  
     377  static void *
     378  format_parse (const char *format, bool translated, char *fdi,
     379                char **invalid_reason)
     380  {
     381    struct spec spec;
     382    struct spec *result;
     383  
     384    spec.directives = 0;
     385    spec.named_arg_count = 0;
     386    spec.allocated = 0;
     387    spec.named = NULL;
     388  
     389    if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason))
     390      {
     391        free_named_args (&spec);
     392        return NULL;
     393      }
     394  
     395    /* Sort the named argument array, and eliminate duplicates.  */
     396    if (spec.named_arg_count > 1)
     397      {
     398        unsigned int i, j;
     399  
     400        qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
     401               named_arg_compare);
     402  
     403        /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
     404        for (i = j = 0; i < spec.named_arg_count; i++)
     405          if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
     406            free (spec.named[i].name);
     407          else
     408            {
     409              if (j < i)
     410                spec.named[j].name = spec.named[i].name;
     411              j++;
     412            }
     413        spec.named_arg_count = j;
     414      }
     415  
     416    result = XMALLOC (struct spec);
     417    *result = spec;
     418    return result;
     419  }
     420  
     421  static void
     422  free_named_args (struct spec *spec)
     423  {
     424    if (spec->named != NULL)
     425      {
     426        unsigned int i;
     427        for (i = 0; i < spec->named_arg_count; i++)
     428          free (spec->named[i].name);
     429        free (spec->named);
     430      }
     431  }
     432  
     433  static void
     434  format_free (void *descr)
     435  {
     436    struct spec *spec = (struct spec *) descr;
     437  
     438    free_named_args (spec);
     439    free (spec);
     440  }
     441  
     442  static int
     443  format_get_number_of_directives (void *descr)
     444  {
     445    struct spec *spec = (struct spec *) descr;
     446  
     447    return spec->directives;
     448  }
     449  
     450  static bool
     451  format_check (void *msgid_descr, void *msgstr_descr, bool equality,
     452                formatstring_error_logger_t error_logger,
     453                const char *pretty_msgid, const char *pretty_msgstr)
     454  {
     455    struct spec *spec1 = (struct spec *) msgid_descr;
     456    struct spec *spec2 = (struct spec *) msgstr_descr;
     457    bool err = false;
     458  
     459    if (spec1->named_arg_count + spec2->named_arg_count > 0)
     460      {
     461        unsigned int i, j;
     462        unsigned int n1 = spec1->named_arg_count;
     463        unsigned int n2 = spec2->named_arg_count;
     464  
     465        /* Check the argument names in spec1 are contained in those of spec2.
     466           Both arrays are sorted.  We search for the differences.  */
     467        for (i = 0, j = 0; i < n1 || j < n2; )
     468          {
     469            int cmp = (i >= n1 ? 1 :
     470                       j >= n2 ? -1 :
     471                       strcmp (spec1->named[i].name, spec2->named[j].name));
     472  
     473            if (cmp > 0)
     474              {
     475                if (equality)
     476                  {
     477                    if (error_logger)
     478                      error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
     479                                    spec2->named[i].name, pretty_msgid);
     480                    err = true;
     481                    break;
     482                  }
     483                else
     484                  j++;
     485              }
     486            else if (cmp < 0)
     487              {
     488                if (equality)
     489                  {
     490                    if (error_logger)
     491                      error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
     492                                    spec1->named[i].name, pretty_msgstr);
     493                    err = true;
     494                    break;
     495                  }
     496                else
     497                  i++;
     498              }
     499            else
     500              j++, i++;
     501          }
     502      }
     503  
     504    return err;
     505  }
     506  
     507  
     508  struct formatstring_parser formatstring_python_brace =
     509  {
     510    format_parse,
     511    format_free,
     512    format_get_number_of_directives,
     513    NULL,
     514    format_check
     515  };
     516  
     517  
     518  #ifdef TEST
     519  
     520  /* Test program: Print the argument list specification returned by
     521     format_parse for strings read from standard input.  */
     522  
     523  #include <stdio.h>
     524  
     525  static void
     526  format_print (void *descr)
     527  {
     528    struct spec *spec = (struct spec *) descr;
     529    unsigned int i;
     530  
     531    if (spec == NULL)
     532      {
     533        printf ("INVALID");
     534        return;
     535      }
     536  
     537    printf ("{");
     538    for (i = 0; i < spec->named_arg_count; i++)
     539      {
     540        if (i > 0)
     541          printf (", ");
     542        printf ("'%s'", spec->named[i].name);
     543      }
     544    printf ("}");
     545  }
     546  
     547  int
     548  main ()
     549  {
     550    for (;;)
     551      {
     552        char *line = NULL;
     553        size_t line_size = 0;
     554        int line_len;
     555        char *invalid_reason;
     556        void *descr;
     557  
     558        line_len = getline (&line, &line_size, stdin);
     559        if (line_len < 0)
     560          break;
     561        if (line_len > 0 && line[line_len - 1] == '\n')
     562          line[--line_len] = '\0';
     563  
     564        invalid_reason = NULL;
     565        descr = format_parse (line, false, NULL, &invalid_reason);
     566  
     567        format_print (descr);
     568        printf ("\n");
     569        if (descr == NULL)
     570          printf ("%s\n", invalid_reason);
     571  
     572        free (invalid_reason);
     573        free (line);
     574      }
     575  
     576    return 0;
     577  }
     578  
     579  /*
     580   * For Emacs M-x compile
     581   * Local Variables:
     582   * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la"
     583   * End:
     584   */
     585  
     586  #endif /* TEST */