(root)/
gettext-0.22.4/
gettext-tools/
src/
format-sh.c
       1  /* Shell format strings.
       2     Copyright (C) 2003-2004, 2006-2007, 2009, 2019-2020, 2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2003.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include <config.h>
      20  #endif
      21  
      22  #include <stdbool.h>
      23  #include <stdlib.h>
      24  #include <string.h>
      25  
      26  #include "format.h"
      27  #include "c-ctype.h"
      28  #include "xalloc.h"
      29  #include "format-invalid.h"
      30  #include "gettext.h"
      31  
      32  #define _(str) gettext (str)
      33  
      34  /* Shell format strings are simply strings subjects to variable substitution.
      35     A variable substitution starts with '$' and is finished by either
      36     - a nonempty sequence of alphanumeric ASCII characters, the first being
      37       not a digit, or
      38     - an opening brace '{', a nonempty sequence of alphanumeric ASCII
      39       characters, the first being not a digit, and a closing brace '}'.
      40     We don't support variable references like $1, $$ or $? since they make
      41     no sense when 'envsubst' is invoked.
      42     We don't support non-ASCII variable names, to avoid dependencies w.r.t. the
      43     current encoding: While "${\xe0}" looks like a variable access in ISO-8859-1
      44     encoding, it doesn't look like one in the BIG5, BIG5-HKSCS, GBK, GB18030,
      45     SHIFT_JIS, JOHAB encodings, because \xe0\x7d is a single character in these
      46     encodings.
      47     We don't support the POSIX syntax for default or alternate values:
      48       ${variable-default}        ${variable:-default}
      49       ${variable=default}        ${variable:=default}
      50       ${variable+replacement}    ${variable:+replacement}
      51       ${variable?ignored}        ${variable:?ignored}
      52     because the translator might be tempted to change the default value; if
      53     we allow it we have a security problem; if we don't allow it the translator
      54     will be surprised.
      55   */
      56  
      57  struct named_arg
      58  {
      59    char *name;
      60  };
      61  
      62  struct spec
      63  {
      64    unsigned int directives;
      65    unsigned int named_arg_count;
      66    struct named_arg *named;
      67  };
      68  
      69  
      70  static int
      71  named_arg_compare (const void *p1, const void *p2)
      72  {
      73    return strcmp (((const struct named_arg *) p1)->name,
      74                   ((const struct named_arg *) p2)->name);
      75  }
      76  
      77  #define INVALID_NON_ASCII_VARIABLE() \
      78    xstrdup (_("The string refers to a shell variable with a non-ASCII name."))
      79  #define INVALID_SHELL_SYNTAX() \
      80    xstrdup (_("The string refers to a shell variable with complex shell brace syntax. This syntax is unsupported here due to security reasons."))
      81  #define INVALID_CONTEXT_DEPENDENT_VARIABLE() \
      82    xstrdup (_("The string refers to a shell variable whose value may be different inside shell functions."))
      83  #define INVALID_EMPTY_VARIABLE() \
      84    xstrdup (_("The string refers to a shell variable with an empty name."))
      85  
      86  static void *
      87  format_parse (const char *format, bool translated, char *fdi,
      88                char **invalid_reason)
      89  {
      90    const char *const format_start = format;
      91    struct spec spec;
      92    unsigned int named_allocated;
      93    struct spec *result;
      94  
      95    spec.directives = 0;
      96    spec.named_arg_count = 0;
      97    spec.named = NULL;
      98    named_allocated = 0;
      99  
     100    for (; *format != '\0';)
     101      if (*format++ == '$')
     102        {
     103          /* A variable substitution.  */
     104          char *name;
     105  
     106          FDI_SET (format - 1, FMTDIR_START);
     107          spec.directives++;
     108  
     109          if (*format == '{')
     110            {
     111              const char *name_start;
     112              const char *name_end;
     113              size_t n;
     114  
     115              name_start = ++format;
     116              for (; *format != '\0'; format++)
     117                {
     118                  if (*format == '}')
     119                    break;
     120                  if (!c_isascii (*format))
     121                    {
     122                      *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
     123                      FDI_SET (format, FMTDIR_ERROR);
     124                      goto bad_format;
     125                    }
     126                  if (format > name_start
     127                      && (*format == '-' || *format == '=' || *format == '+'
     128                          || *format == '?' || *format == ':'))
     129                    {
     130                      *invalid_reason = INVALID_SHELL_SYNTAX ();
     131                      FDI_SET (format, FMTDIR_ERROR);
     132                      goto bad_format;
     133                    }
     134                  if (!(c_isalnum (*format) || *format == '_')
     135                      || (format == name_start && c_isdigit (*format)))
     136                    {
     137                      *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
     138                      FDI_SET (format, FMTDIR_ERROR);
     139                      goto bad_format;
     140                    }
     141                }
     142              if (*format == '\0')
     143                {
     144                  *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
     145                  FDI_SET (format - 1, FMTDIR_ERROR);
     146                  goto bad_format;
     147                }
     148              name_end = format++;
     149  
     150              n = name_end - name_start;
     151              if (n == 0)
     152                {
     153                  *invalid_reason = INVALID_EMPTY_VARIABLE ();
     154                  FDI_SET (format - 1, FMTDIR_ERROR);
     155                  goto bad_format;
     156                }
     157              name = XNMALLOC (n + 1, char);
     158              memcpy (name, name_start, n);
     159              name[n] = '\0';
     160            }
     161          else if (c_isalpha (*format) || *format == '_')
     162            {
     163              const char *name_start;
     164              const char *name_end;
     165              size_t n;
     166  
     167              name_start = format;
     168              do
     169                format++;
     170              while (*format != '\0' && (c_isalnum (*format) || *format == '_'));
     171              name_end = format;
     172  
     173              n = name_end - name_start;
     174              name = XNMALLOC (n + 1, char);
     175              memcpy (name, name_start, n);
     176              name[n] = '\0';
     177            }
     178          else if (*format != '\0')
     179            {
     180              if (!c_isascii (*format))
     181                {
     182                  *invalid_reason = INVALID_NON_ASCII_VARIABLE ();
     183                  FDI_SET (format, FMTDIR_ERROR);
     184                  goto bad_format;
     185                }
     186              else
     187                {
     188                  *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE ();
     189                  FDI_SET (format, FMTDIR_ERROR);
     190                  goto bad_format;
     191                }
     192            }
     193          else
     194            {
     195              *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
     196              FDI_SET (format - 1, FMTDIR_ERROR);
     197              goto bad_format;
     198            }
     199  
     200          /* Named argument.  */
     201          if (named_allocated == spec.named_arg_count)
     202            {
     203              named_allocated = 2 * named_allocated + 1;
     204              spec.named = (struct named_arg *) xrealloc (spec.named, named_allocated * sizeof (struct named_arg));
     205            }
     206          spec.named[spec.named_arg_count].name = name;
     207          spec.named_arg_count++;
     208  
     209          FDI_SET (format - 1, FMTDIR_END);
     210        }
     211  
     212    /* Sort the named argument array, and eliminate duplicates.  */
     213    if (spec.named_arg_count > 1)
     214      {
     215        unsigned int i, j;
     216  
     217        qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
     218               named_arg_compare);
     219  
     220        /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
     221        for (i = j = 0; i < spec.named_arg_count; i++)
     222          if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
     223            free (spec.named[i].name);
     224          else
     225            {
     226              if (j < i)
     227                spec.named[j].name = spec.named[i].name;
     228              j++;
     229            }
     230        spec.named_arg_count = j;
     231      }
     232  
     233    result = XMALLOC (struct spec);
     234    *result = spec;
     235    return result;
     236  
     237   bad_format:
     238    if (spec.named != NULL)
     239      {
     240        unsigned int i;
     241        for (i = 0; i < spec.named_arg_count; i++)
     242          free (spec.named[i].name);
     243        free (spec.named);
     244      }
     245    return NULL;
     246  }
     247  
     248  static void
     249  format_free (void *descr)
     250  {
     251    struct spec *spec = (struct spec *) descr;
     252  
     253    if (spec->named != NULL)
     254      {
     255        unsigned int i;
     256        for (i = 0; i < spec->named_arg_count; i++)
     257          free (spec->named[i].name);
     258        free (spec->named);
     259      }
     260    free (spec);
     261  }
     262  
     263  static int
     264  format_get_number_of_directives (void *descr)
     265  {
     266    struct spec *spec = (struct spec *) descr;
     267  
     268    return spec->directives;
     269  }
     270  
     271  static bool
     272  format_check (void *msgid_descr, void *msgstr_descr, bool equality,
     273                formatstring_error_logger_t error_logger,
     274                const char *pretty_msgid, const char *pretty_msgstr)
     275  {
     276    struct spec *spec1 = (struct spec *) msgid_descr;
     277    struct spec *spec2 = (struct spec *) msgstr_descr;
     278    bool err = false;
     279  
     280    if (spec1->named_arg_count + spec2->named_arg_count > 0)
     281      {
     282        unsigned int i, j;
     283        unsigned int n1 = spec1->named_arg_count;
     284        unsigned int n2 = spec2->named_arg_count;
     285  
     286        /* Check that the argument names are the same.
     287           Both arrays are sorted.  We search for the first difference.  */
     288        for (i = 0, j = 0; i < n1 || j < n2; )
     289          {
     290            int cmp = (i >= n1 ? 1 :
     291                       j >= n2 ? -1 :
     292                       strcmp (spec1->named[i].name, spec2->named[j].name));
     293  
     294            if (cmp > 0)
     295              {
     296                if (error_logger)
     297                  error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
     298                                spec2->named[j].name, pretty_msgstr,
     299                                pretty_msgid);
     300                err = true;
     301                break;
     302              }
     303            else if (cmp < 0)
     304              {
     305                if (equality)
     306                  {
     307                    if (error_logger)
     308                      error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
     309                                    spec1->named[i].name, pretty_msgstr);
     310                    err = true;
     311                    break;
     312                  }
     313                else
     314                  i++;
     315              }
     316            else
     317              j++, i++;
     318          }
     319      }
     320  
     321    return err;
     322  }
     323  
     324  
     325  struct formatstring_parser formatstring_sh =
     326  {
     327    format_parse,
     328    format_free,
     329    format_get_number_of_directives,
     330    NULL,
     331    format_check
     332  };
     333  
     334  
     335  #ifdef TEST
     336  
     337  /* Test program: Print the argument list specification returned by
     338     format_parse for strings read from standard input.  */
     339  
     340  #include <stdio.h>
     341  
     342  static void
     343  format_print (void *descr)
     344  {
     345    struct spec *spec = (struct spec *) descr;
     346    unsigned int i;
     347  
     348    if (spec == NULL)
     349      {
     350        printf ("INVALID");
     351        return;
     352      }
     353  
     354    printf ("{");
     355    for (i = 0; i < spec->named_arg_count; i++)
     356      {
     357        if (i > 0)
     358          printf (", ");
     359        printf ("'%s'", spec->named[i].name);
     360      }
     361    printf ("}");
     362  }
     363  
     364  int
     365  main ()
     366  {
     367    for (;;)
     368      {
     369        char *line = NULL;
     370        size_t line_size = 0;
     371        int line_len;
     372        char *invalid_reason;
     373        void *descr;
     374  
     375        line_len = getline (&line, &line_size, stdin);
     376        if (line_len < 0)
     377          break;
     378        if (line_len > 0 && line[line_len - 1] == '\n')
     379          line[--line_len] = '\0';
     380  
     381        invalid_reason = NULL;
     382        descr = format_parse (line, false, NULL, &invalid_reason);
     383  
     384        format_print (descr);
     385        printf ("\n");
     386        if (descr == NULL)
     387          printf ("%s\n", invalid_reason);
     388  
     389        free (invalid_reason);
     390        free (line);
     391      }
     392  
     393    return 0;
     394  }
     395  
     396  /*
     397   * For Emacs M-x compile
     398   * Local Variables:
     399   * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-sh.c ../gnulib-lib/libgettextlib.la"
     400   * End:
     401   */
     402  
     403  #endif /* TEST */