(root)/
gettext-0.22.4/
gettext-tools/
src/
format-python.c
       1  /* Python format strings.
       2     Copyright (C) 2001-2004, 2006-2009, 2019-2020, 2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <haible@clisp.cons.org>, 2001.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include <config.h>
      20  #endif
      21  
      22  #include <stdbool.h>
      23  #include <stdlib.h>
      24  #include <string.h>
      25  
      26  #include "format.h"
      27  #include "c-ctype.h"
      28  #include "xalloc.h"
      29  #include "xvasprintf.h"
      30  #include "format-invalid.h"
      31  #include "gettext.h"
      32  
      33  #define _(str) gettext (str)
      34  
      35  /* Python format strings are described in
      36       Python Library reference
      37       2. Built-in Types, Exceptions and Functions
      38       2.1. Built-in Types
      39       2.1.5. Sequence Types
      40       2.1.5.2. String Formatting Operations
      41     Any string or Unicode string can act as format string via the '%' operator,
      42     implemented in stringobject.c and unicodeobject.c.
      43     A directive
      44     - starts with '%'
      45     - is optionally followed by '(ident)' where ident is any sequence of
      46       characters with balanced left and right parentheses,
      47     - is optionally followed by any of the characters '-' (left justification),
      48       '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
      49       flag,
      50     - is optionally followed by a width specification: '*' (reads an argument)
      51       or a nonempty digit sequence,
      52     - is optionally followed by '.' and a precision specification: '*' (reads
      53       an argument) or a nonempty digit sequence,
      54     - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
      55     - is finished by a specifier
      56         - '%', that needs no argument,
      57         - 'c', that needs a character argument,
      58         - 's', 'r', that need a string argument (or, when a precision of 0 is
      59           given, an argument of any type),
      60         - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
      61         - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
      62     Use of '(ident)' and use of unnamed argument specifications are exclusive,
      63     because the first requires a mapping as argument, while the second requires
      64     a tuple as argument. When unnamed arguments are used, the number of
      65     arguments in the format string and the number of elements in the argument
      66     tuple (to the right of the '%' operator) must be the same.
      67   */
      68  
      69  enum format_arg_type
      70  {
      71    FAT_NONE,
      72    FAT_ANY,
      73    FAT_CHARACTER,
      74    FAT_STRING,
      75    FAT_INTEGER,
      76    FAT_FLOAT
      77  };
      78  
      79  struct named_arg
      80  {
      81    char *name;
      82    enum format_arg_type type;
      83  };
      84  
      85  struct unnamed_arg
      86  {
      87    enum format_arg_type type;
      88  };
      89  
      90  struct spec
      91  {
      92    unsigned int directives;
      93    unsigned int named_arg_count;
      94    unsigned int unnamed_arg_count;
      95    struct named_arg *named;
      96    struct unnamed_arg *unnamed;
      97  };
      98  
      99  /* Locale independent test for a decimal digit.
     100     Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
     101     <ctype.h> isdigit must be an 'unsigned char'.)  */
     102  #undef isdigit
     103  #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
     104  
     105  
     106  static int
     107  named_arg_compare (const void *p1, const void *p2)
     108  {
     109    return strcmp (((const struct named_arg *) p1)->name,
     110                   ((const struct named_arg *) p2)->name);
     111  }
     112  
     113  #define INVALID_MIXES_NAMED_UNNAMED() \
     114    xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
     115  
     116  static void *
     117  format_parse (const char *format, bool translated, char *fdi,
     118                char **invalid_reason)
     119  {
     120    const char *const format_start = format;
     121    struct spec spec;
     122    unsigned int allocated;
     123    struct spec *result;
     124  
     125    spec.directives = 0;
     126    spec.named_arg_count = 0;
     127    spec.unnamed_arg_count = 0;
     128    spec.named = NULL;
     129    spec.unnamed = NULL;
     130    allocated = 0;
     131  
     132    for (; *format != '\0';)
     133      if (*format++ == '%')
     134        {
     135          /* A directive.  */
     136          char *name = NULL;
     137          bool zero_precision = false;
     138          enum format_arg_type type;
     139  
     140          FDI_SET (format - 1, FMTDIR_START);
     141          spec.directives++;
     142  
     143          if (*format == '(')
     144            {
     145              unsigned int depth;
     146              const char *name_start;
     147              const char *name_end;
     148              size_t n;
     149  
     150              name_start = ++format;
     151              depth = 0;
     152              for (; *format != '\0'; format++)
     153                {
     154                  if (*format == '(')
     155                    depth++;
     156                  else if (*format == ')')
     157                    {
     158                      if (depth == 0)
     159                        break;
     160                      else
     161                        depth--;
     162                    }
     163                }
     164              if (*format == '\0')
     165                {
     166                  *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
     167                  FDI_SET (format - 1, FMTDIR_ERROR);
     168                  goto bad_format;
     169                }
     170              name_end = format++;
     171  
     172              n = name_end - name_start;
     173              name = XNMALLOC (n + 1, char);
     174              memcpy (name, name_start, n);
     175              name[n] = '\0';
     176            }
     177  
     178          while (*format == '-' || *format == '+' || *format == ' '
     179                 || *format == '#' || *format == '0')
     180            format++;
     181  
     182          if (*format == '*')
     183            {
     184              format++;
     185  
     186              /* Named and unnamed specifications are exclusive.  */
     187              if (spec.named_arg_count > 0)
     188                {
     189                  *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
     190                  FDI_SET (format - 1, FMTDIR_ERROR);
     191                  goto bad_format;
     192                }
     193  
     194              if (allocated == spec.unnamed_arg_count)
     195                {
     196                  allocated = 2 * allocated + 1;
     197                  spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
     198                }
     199              spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
     200              spec.unnamed_arg_count++;
     201            }
     202          else if (isdigit (*format))
     203            {
     204              do format++; while (isdigit (*format));
     205            }
     206  
     207          if (*format == '.')
     208            {
     209              format++;
     210  
     211              if (*format == '*')
     212                {
     213                  format++;
     214  
     215                  /* Named and unnamed specifications are exclusive.  */
     216                  if (spec.named_arg_count > 0)
     217                    {
     218                      *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
     219                      FDI_SET (format - 1, FMTDIR_ERROR);
     220                      goto bad_format;
     221                    }
     222  
     223                  if (allocated == spec.unnamed_arg_count)
     224                    {
     225                      allocated = 2 * allocated + 1;
     226                      spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
     227                    }
     228                  spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
     229                  spec.unnamed_arg_count++;
     230                }
     231              else if (isdigit (*format))
     232                {
     233                  zero_precision = true;
     234                  do
     235                    {
     236                      if (*format != '0')
     237                        zero_precision = false;
     238                      format++;
     239                    }
     240                  while (isdigit (*format));
     241                }
     242            }
     243  
     244          if (*format == 'h' || *format == 'l' || *format == 'L')
     245            format++;
     246  
     247          switch (*format)
     248            {
     249            case '%':
     250              type = FAT_NONE;
     251              break;
     252            case 'c':
     253              type = FAT_CHARACTER;
     254              break;
     255            case 's': case 'r':
     256              type = (zero_precision ? FAT_ANY : FAT_STRING);
     257              break;
     258            case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
     259              type = FAT_INTEGER;
     260              break;
     261            case 'e': case 'E': case 'f': case 'g': case 'G':
     262              type = FAT_FLOAT;
     263              break;
     264            default:
     265              if (*format == '\0')
     266                {
     267                  *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
     268                  FDI_SET (format - 1, FMTDIR_ERROR);
     269                }
     270              else
     271                {
     272                  *invalid_reason =
     273                    INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
     274                  FDI_SET (format, FMTDIR_ERROR);
     275                }
     276              goto bad_format;
     277            }
     278  
     279          if (name != NULL)
     280            {
     281              /* Named argument.  */
     282  
     283              /* Named and unnamed specifications are exclusive.  */
     284              if (spec.unnamed_arg_count > 0)
     285                {
     286                  *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
     287                  FDI_SET (format, FMTDIR_ERROR);
     288                  goto bad_format;
     289                }
     290  
     291              if (allocated == spec.named_arg_count)
     292                {
     293                  allocated = 2 * allocated + 1;
     294                  spec.named = (struct named_arg *) xrealloc (spec.named, allocated * sizeof (struct named_arg));
     295                }
     296              spec.named[spec.named_arg_count].name = name;
     297              spec.named[spec.named_arg_count].type = type;
     298              spec.named_arg_count++;
     299            }
     300          else if (*format != '%')
     301            {
     302              /* Unnamed argument.  */
     303  
     304              /* Named and unnamed specifications are exclusive.  */
     305              if (spec.named_arg_count > 0)
     306                {
     307                  *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
     308                  FDI_SET (format, FMTDIR_ERROR);
     309                  goto bad_format;
     310                }
     311  
     312              if (allocated == spec.unnamed_arg_count)
     313                {
     314                  allocated = 2 * allocated + 1;
     315                  spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
     316                }
     317              spec.unnamed[spec.unnamed_arg_count].type = type;
     318              spec.unnamed_arg_count++;
     319            }
     320  
     321          FDI_SET (format, FMTDIR_END);
     322  
     323          format++;
     324        }
     325  
     326    /* Sort the named argument array, and eliminate duplicates.  */
     327    if (spec.named_arg_count > 1)
     328      {
     329        unsigned int i, j;
     330        bool err;
     331  
     332        qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
     333               named_arg_compare);
     334  
     335        /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
     336        err = false;
     337        for (i = j = 0; i < spec.named_arg_count; i++)
     338          if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
     339            {
     340              enum format_arg_type type1 = spec.named[i].type;
     341              enum format_arg_type type2 = spec.named[j-1].type;
     342              enum format_arg_type type_both;
     343  
     344              if (type1 == type2 || type2 == FAT_ANY)
     345                type_both = type1;
     346              else if (type1 == FAT_ANY)
     347                type_both = type2;
     348              else
     349                {
     350                  /* Incompatible types.  */
     351                  type_both = FAT_NONE;
     352                  if (!err)
     353                    *invalid_reason =
     354                      xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
     355                  err = true;
     356                }
     357  
     358              spec.named[j-1].type = type_both;
     359              free (spec.named[i].name);
     360            }
     361          else
     362            {
     363              if (j < i)
     364                {
     365                  spec.named[j].name = spec.named[i].name;
     366                  spec.named[j].type = spec.named[i].type;
     367                }
     368              j++;
     369            }
     370        spec.named_arg_count = j;
     371        if (err)
     372          /* *invalid_reason has already been set above.  */
     373          goto bad_format;
     374      }
     375  
     376    result = XMALLOC (struct spec);
     377    *result = spec;
     378    return result;
     379  
     380   bad_format:
     381    if (spec.named != NULL)
     382      {
     383        unsigned int i;
     384        for (i = 0; i < spec.named_arg_count; i++)
     385          free (spec.named[i].name);
     386        free (spec.named);
     387      }
     388    if (spec.unnamed != NULL)
     389      free (spec.unnamed);
     390    return NULL;
     391  }
     392  
     393  static void
     394  format_free (void *descr)
     395  {
     396    struct spec *spec = (struct spec *) descr;
     397  
     398    if (spec->named != NULL)
     399      {
     400        unsigned int i;
     401        for (i = 0; i < spec->named_arg_count; i++)
     402          free (spec->named[i].name);
     403        free (spec->named);
     404      }
     405    if (spec->unnamed != NULL)
     406      free (spec->unnamed);
     407    free (spec);
     408  }
     409  
     410  static int
     411  format_get_number_of_directives (void *descr)
     412  {
     413    struct spec *spec = (struct spec *) descr;
     414  
     415    return spec->directives;
     416  }
     417  
     418  static bool
     419  format_check (void *msgid_descr, void *msgstr_descr, bool equality,
     420                formatstring_error_logger_t error_logger,
     421                const char *pretty_msgid, const char *pretty_msgstr)
     422  {
     423    struct spec *spec1 = (struct spec *) msgid_descr;
     424    struct spec *spec2 = (struct spec *) msgstr_descr;
     425    bool err = false;
     426  
     427    if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
     428      {
     429        if (error_logger)
     430          error_logger (_("format specifications in '%s' expect a mapping, those in '%s' expect a tuple"),
     431                        pretty_msgid, pretty_msgstr);
     432        err = true;
     433      }
     434    else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
     435      {
     436        if (error_logger)
     437          error_logger (_("format specifications in '%s' expect a tuple, those in '%s' expect a mapping"),
     438                        pretty_msgid, pretty_msgstr);
     439        err = true;
     440      }
     441    else
     442      {
     443        if (spec1->named_arg_count + spec2->named_arg_count > 0)
     444          {
     445            unsigned int i, j;
     446            unsigned int n1 = spec1->named_arg_count;
     447            unsigned int n2 = spec2->named_arg_count;
     448  
     449            /* Check that the argument names are the same.
     450               Both arrays are sorted.  We search for the first difference.  */
     451            for (i = 0, j = 0; i < n1 || j < n2; )
     452              {
     453                int cmp = (i >= n1 ? 1 :
     454                           j >= n2 ? -1 :
     455                           strcmp (spec1->named[i].name, spec2->named[j].name));
     456  
     457                if (cmp > 0)
     458                  {
     459                    if (error_logger)
     460                      error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
     461                                    spec2->named[j].name, pretty_msgstr,
     462                                    pretty_msgid);
     463                    err = true;
     464                    break;
     465                  }
     466                else if (cmp < 0)
     467                  {
     468                    if (equality)
     469                      {
     470                        if (error_logger)
     471                          error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
     472                                        spec1->named[i].name, pretty_msgstr);
     473                        err = true;
     474                        break;
     475                      }
     476                    else
     477                      i++;
     478                  }
     479                else
     480                  j++, i++;
     481              }
     482            /* Check the argument types are the same.  */
     483            if (!err)
     484              for (i = 0, j = 0; j < n2; )
     485                {
     486                  if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
     487                    {
     488                      if (!(spec1->named[i].type == spec2->named[j].type
     489                            || (!equality
     490                                && (spec1->named[i].type == FAT_ANY
     491                                    || spec2->named[j].type == FAT_ANY))))
     492                        {
     493                          if (error_logger)
     494                            error_logger (_("format specifications in '%s' and '%s' for argument '%s' are not the same"),
     495                                          pretty_msgid, pretty_msgstr,
     496                                          spec2->named[j].name);
     497                          err = true;
     498                          break;
     499                        }
     500                      j++, i++;
     501                    }
     502                  else
     503                    i++;
     504                }
     505          }
     506  
     507        if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
     508          {
     509            unsigned int i;
     510  
     511            /* Check the argument types are the same.  */
     512            if (spec1->unnamed_arg_count != spec2->unnamed_arg_count)
     513              {
     514                if (error_logger)
     515                  error_logger (_("number of format specifications in '%s' and '%s' does not match"),
     516                                pretty_msgid, pretty_msgstr);
     517                err = true;
     518              }
     519            else
     520              for (i = 0; i < spec2->unnamed_arg_count; i++)
     521                if (!(spec1->unnamed[i].type == spec2->unnamed[i].type
     522                      || (!equality
     523                          && (spec1->unnamed[i].type == FAT_ANY
     524                              || spec2->unnamed[i].type == FAT_ANY))))
     525                  {
     526                    if (error_logger)
     527                      error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
     528                                    pretty_msgid, pretty_msgstr, i + 1);
     529                    err = true;
     530                  }
     531          }
     532      }
     533  
     534    return err;
     535  }
     536  
     537  
     538  struct formatstring_parser formatstring_python =
     539  {
     540    format_parse,
     541    format_free,
     542    format_get_number_of_directives,
     543    NULL,
     544    format_check
     545  };
     546  
     547  
     548  unsigned int
     549  get_python_format_unnamed_arg_count (const char *string)
     550  {
     551    /* Parse the format string.  */
     552    char *invalid_reason = NULL;
     553    struct spec *descr =
     554      (struct spec *) format_parse (string, false, NULL, &invalid_reason);
     555  
     556    if (descr != NULL)
     557      {
     558        unsigned int result = descr->unnamed_arg_count;
     559  
     560        format_free (descr);
     561        return result;
     562      }
     563    else
     564      {
     565        free (invalid_reason);
     566        return 0;
     567      }
     568  }
     569  
     570  
     571  #ifdef TEST
     572  
     573  /* Test program: Print the argument list specification returned by
     574     format_parse for strings read from standard input.  */
     575  
     576  #include <stdio.h>
     577  
     578  static void
     579  format_print (void *descr)
     580  {
     581    struct spec *spec = (struct spec *) descr;
     582    unsigned int i;
     583  
     584    if (spec == NULL)
     585      {
     586        printf ("INVALID");
     587        return;
     588      }
     589  
     590    if (spec->named_arg_count > 0)
     591      {
     592        if (spec->unnamed_arg_count > 0)
     593          abort ();
     594  
     595        printf ("{");
     596        for (i = 0; i < spec->named_arg_count; i++)
     597          {
     598            if (i > 0)
     599              printf (", ");
     600            printf ("'%s':", spec->named[i].name);
     601            switch (spec->named[i].type)
     602              {
     603              case FAT_ANY:
     604                printf ("*");
     605                break;
     606              case FAT_CHARACTER:
     607                printf ("c");
     608                break;
     609              case FAT_STRING:
     610                printf ("s");
     611                break;
     612              case FAT_INTEGER:
     613                printf ("i");
     614                break;
     615              case FAT_FLOAT:
     616                printf ("f");
     617                break;
     618              default:
     619                abort ();
     620              }
     621          }
     622        printf ("}");
     623      }
     624    else
     625      {
     626        printf ("(");
     627        for (i = 0; i < spec->unnamed_arg_count; i++)
     628          {
     629            if (i > 0)
     630              printf (" ");
     631            switch (spec->unnamed[i].type)
     632              {
     633              case FAT_ANY:
     634                printf ("*");
     635                break;
     636              case FAT_CHARACTER:
     637                printf ("c");
     638                break;
     639              case FAT_STRING:
     640                printf ("s");
     641                break;
     642              case FAT_INTEGER:
     643                printf ("i");
     644                break;
     645              case FAT_FLOAT:
     646                printf ("f");
     647                break;
     648              default:
     649                abort ();
     650              }
     651          }
     652        printf (")");
     653      }
     654  }
     655  
     656  int
     657  main ()
     658  {
     659    for (;;)
     660      {
     661        char *line = NULL;
     662        size_t line_size = 0;
     663        int line_len;
     664        char *invalid_reason;
     665        void *descr;
     666  
     667        line_len = getline (&line, &line_size, stdin);
     668        if (line_len < 0)
     669          break;
     670        if (line_len > 0 && line[line_len - 1] == '\n')
     671          line[--line_len] = '\0';
     672  
     673        invalid_reason = NULL;
     674        descr = format_parse (line, false, NULL, &invalid_reason);
     675  
     676        format_print (descr);
     677        printf ("\n");
     678        if (descr == NULL)
     679          printf ("%s\n", invalid_reason);
     680  
     681        free (invalid_reason);
     682        free (line);
     683      }
     684  
     685    return 0;
     686  }
     687  
     688  /*
     689   * For Emacs M-x compile
     690   * Local Variables:
     691   * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-python.c ../gnulib-lib/libgettextlib.la"
     692   * End:
     693   */
     694  
     695  #endif /* TEST */