(root)/
glibc-2.38/
stdio-common/
printf-parsemb.c
       1  /* Helper functions for parsing printf format strings.
       2     Copyright (C) 1995-2023 Free Software Foundation, Inc.
       3     This file is part of th GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <ctype.h>
      20  #include <limits.h>
      21  #include <stdlib.h>
      22  #include <string.h>
      23  #include <sys/param.h>
      24  #include <wchar.h>
      25  #include <wctype.h>
      26  
      27  #ifndef COMPILE_WPRINTF
      28  # define CHAR_T		char
      29  # define UCHAR_T	unsigned char
      30  # define INT_T		int
      31  # define L_(Str)	Str
      32  # define ISDIGIT(Ch)	isdigit (Ch)
      33  # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_mb
      34  #else
      35  # define CHAR_T		wchar_t
      36  # define UCHAR_T	unsigned int
      37  # define INT_T		wint_t
      38  # define L_(Str)	L##Str
      39  # define ISDIGIT(Ch)	iswdigit (Ch)
      40  # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_wc
      41  #endif
      42  
      43  #include "printf-parse.h"
      44  
      45  #define NDEBUG 1
      46  #include <assert.h>
      47  
      48  
      49  
      50  /* FORMAT must point to a '%' at the beginning of a spec.  Fills in *SPEC
      51     with the parsed details.  POSN is the number of arguments already
      52     consumed.  At most MAXTYPES - POSN types are filled in TYPES.  Return
      53     the number of args consumed by this spec; *MAX_REF_ARG is updated so it
      54     remains the highest argument index used.  */
      55  size_t
      56  attribute_hidden
      57  #ifdef COMPILE_WPRINTF
      58  __parse_one_specwc (const UCHAR_T *format, size_t posn,
      59  		    struct printf_spec *spec, size_t *max_ref_arg,
      60  		    bool *failed)
      61  #else
      62  __parse_one_specmb (const UCHAR_T *format, size_t posn,
      63  		    struct printf_spec *spec, size_t *max_ref_arg,
      64  		    bool *failed)
      65  #endif
      66  {
      67    unsigned int n;
      68    size_t nargs = 0;
      69    bool is_fast;
      70  
      71    /* Skip the '%'.  */
      72    ++format;
      73  
      74    /* Clear information structure.  */
      75    spec->data_arg = -1;
      76    spec->info.alt = 0;
      77    spec->info.space = 0;
      78    spec->info.left = 0;
      79    spec->info.showsign = 0;
      80    spec->info.group = 0;
      81    spec->info.i18n = 0;
      82    spec->info.extra = 0;
      83    spec->info.pad = ' ';
      84    spec->info.wide = sizeof (UCHAR_T) > 1;
      85    spec->info.is_binary128 = 0;
      86  
      87    *failed = false;
      88  
      89    /* Test for positional argument.  */
      90    if (ISDIGIT (*format))
      91      {
      92        const UCHAR_T *begin = format;
      93  
      94        n = read_int (&format);
      95  
      96        if (n != 0 && *format == L_('$'))
      97  	/* Is positional parameter.  */
      98  	{
      99  	  ++format;		/* Skip the '$'.  */
     100  	  if (n != -1)
     101  	    {
     102  	      spec->data_arg = n - 1;
     103  	      *max_ref_arg = MAX (*max_ref_arg, n);
     104  	    }
     105  	}
     106        else
     107  	/* Oops; that was actually the width and/or 0 padding flag.
     108  	   Step back and read it again.  */
     109  	format = begin;
     110      }
     111  
     112    /* Check for spec modifiers.  */
     113    do
     114      {
     115        switch (*format)
     116  	{
     117  	case L_(' '):
     118  	  /* Output a space in place of a sign, when there is no sign.  */
     119  	  spec->info.space = 1;
     120  	  continue;
     121  	case L_('+'):
     122  	  /* Always output + or - for numbers.  */
     123  	  spec->info.showsign = 1;
     124  	  continue;
     125  	case L_('-'):
     126  	  /* Left-justify things.  */
     127  	  spec->info.left = 1;
     128  	  continue;
     129  	case L_('#'):
     130  	  /* Use the "alternate form":
     131  	     Hex has 0x or 0X, FP always has a decimal point.  */
     132  	  spec->info.alt = 1;
     133  	  continue;
     134  	case L_('0'):
     135  	  /* Pad with 0s.  */
     136  	  spec->info.pad = '0';
     137  	  continue;
     138  	case L_('\''):
     139  	  /* Show grouping in numbers if the locale information
     140  	     indicates any.  */
     141  	  spec->info.group = 1;
     142  	  continue;
     143  	case L_('I'):
     144  	  /* Use the internationalized form of the output.  Currently
     145  	     means to use the `outdigits' of the current locale.  */
     146  	  spec->info.i18n = 1;
     147  	  continue;
     148  	default:
     149  	  break;
     150  	}
     151        break;
     152      }
     153    while (*++format);
     154  
     155    if (spec->info.left)
     156      spec->info.pad = ' ';
     157  
     158    /* Get the field width.  */
     159    spec->width_arg = -1;
     160    spec->info.width = 0;
     161    if (*format == L_('*'))
     162      {
     163        /* The field width is given in an argument.
     164  	 A negative field width indicates left justification.  */
     165        const UCHAR_T *begin = ++format;
     166  
     167        if (ISDIGIT (*format))
     168  	{
     169  	  /* The width argument might be found in a positional parameter.  */
     170  	  n = read_int (&format);
     171  
     172  	  if (n != 0 && *format == L_('$'))
     173  	    {
     174  	      if (n != -1)
     175  		{
     176  		  spec->width_arg = n - 1;
     177  		  *max_ref_arg = MAX (*max_ref_arg, n);
     178  		}
     179  	      ++format;		/* Skip '$'.  */
     180  	    }
     181  	}
     182  
     183        if (spec->width_arg < 0)
     184  	{
     185  	  /* Not in a positional parameter.  Consume one argument.  */
     186  	  spec->width_arg = posn++;
     187  	  ++nargs;
     188  	  format = begin;	/* Step back and reread.  */
     189  	}
     190      }
     191    else if (ISDIGIT (*format))
     192      {
     193        int n = read_int (&format);
     194  
     195        /* Constant width specification.  */
     196        if (n != -1)
     197  	spec->info.width = n;
     198      }
     199    /* Get the precision.  */
     200    spec->prec_arg = -1;
     201    /* -1 means none given; 0 means explicit 0.  */
     202    spec->info.prec = -1;
     203    if (*format == L_('.'))
     204      {
     205        ++format;
     206        if (*format == L_('*'))
     207  	{
     208  	  /* The precision is given in an argument.  */
     209  	  const UCHAR_T *begin = ++format;
     210  
     211  	  if (ISDIGIT (*format))
     212  	    {
     213  	      n = read_int (&format);
     214  
     215  	      if (n != 0 && *format == L_('$'))
     216  		{
     217  		  if (n != -1)
     218  		    {
     219  		      spec->prec_arg = n - 1;
     220  		      *max_ref_arg = MAX (*max_ref_arg, n);
     221  		    }
     222  		  ++format;
     223  		}
     224  	    }
     225  
     226  	  if (spec->prec_arg < 0)
     227  	    {
     228  	      /* Not in a positional parameter.  */
     229  	      spec->prec_arg = posn++;
     230  	      ++nargs;
     231  	      format = begin;
     232  	    }
     233  	}
     234        else if (ISDIGIT (*format))
     235  	{
     236  	  int n = read_int (&format);
     237  
     238  	  if (n != -1)
     239  	    spec->info.prec = n;
     240  	}
     241        else
     242  	/* "%.?" is treated like "%.0?".  */
     243  	spec->info.prec = 0;
     244      }
     245  
     246    /* Check for type modifiers.  */
     247    spec->info.is_long_double = 0;
     248    spec->info.is_short = 0;
     249    spec->info.is_long = 0;
     250    spec->info.is_char = 0;
     251    spec->info.user = 0;
     252  
     253    if (__builtin_expect (__printf_modifier_table == NULL, 1)
     254        || __printf_modifier_table[*format] == NULL
     255        || HANDLE_REGISTERED_MODIFIER (&format, &spec->info) != 0)
     256      switch (*format++)
     257        {
     258        case L_('h'):
     259  	/* ints are short ints or chars.  */
     260  	if (*format != L_('h'))
     261  	  spec->info.is_short = 1;
     262  	else
     263  	  {
     264  	    ++format;
     265  	    spec->info.is_char = 1;
     266  	  }
     267  	break;
     268        case L_('l'):
     269  	/* ints are long ints.  */
     270  	spec->info.is_long = 1;
     271  	if (*format != L_('l'))
     272  	  break;
     273  	++format;
     274  	/* FALLTHROUGH */
     275        case L_('L'):
     276  	/* doubles are long doubles, and ints are long long ints.  */
     277        case L_('q'):
     278  	/* 4.4 uses this for long long.  */
     279  	spec->info.is_long_double = 1;
     280  	break;
     281        case L_('z'):
     282        case L_('Z'):
     283  	/* ints are size_ts.  */
     284  	assert (sizeof (size_t) <= sizeof (unsigned long long int));
     285  #if LONG_MAX != LONG_LONG_MAX
     286  	spec->info.is_long_double = (sizeof (size_t)
     287  				     > sizeof (unsigned long int));
     288  #endif
     289  	spec->info.is_long = sizeof (size_t) > sizeof (unsigned int);
     290  	break;
     291        case L_('t'):
     292  	assert (sizeof (ptrdiff_t) <= sizeof (long long int));
     293  #if LONG_MAX != LONG_LONG_MAX
     294  	spec->info.is_long_double = (sizeof (ptrdiff_t) > sizeof (long int));
     295  #endif
     296  	spec->info.is_long = sizeof (ptrdiff_t) > sizeof (int);
     297  	break;
     298        case L_('j'):
     299  	assert (sizeof (uintmax_t) <= sizeof (unsigned long long int));
     300  #if LONG_MAX != LONG_LONG_MAX
     301  	spec->info.is_long_double = (sizeof (uintmax_t)
     302  				     > sizeof (unsigned long int));
     303  #endif
     304  	spec->info.is_long = sizeof (uintmax_t) > sizeof (unsigned int);
     305  	break;
     306        case L_('w'):
     307  	is_fast = false;
     308  	if (*format == L_('f'))
     309  	  {
     310  	    ++format;
     311  	    is_fast = true;
     312  	  }
     313  	int bitwidth = 0;
     314  	if (ISDIGIT (*format))
     315  	  bitwidth = read_int (&format);
     316  	if (is_fast)
     317  	  switch (bitwidth)
     318  	    {
     319  	    case 8:
     320  	      bitwidth = INT_FAST8_WIDTH;
     321  	      break;
     322  	    case 16:
     323  	      bitwidth = INT_FAST16_WIDTH;
     324  	      break;
     325  	    case 32:
     326  	      bitwidth = INT_FAST32_WIDTH;
     327  	      break;
     328  	    case 64:
     329  	      bitwidth = INT_FAST64_WIDTH;
     330  	      break;
     331  	    }
     332  	switch (bitwidth)
     333  	  {
     334  	  case 8:
     335  	    spec->info.is_char = 1;
     336  	    break;
     337  	  case 16:
     338  	    spec->info.is_short = 1;
     339  	    break;
     340  	  case 32:
     341  	    break;
     342  	  case 64:
     343  	    spec->info.is_long_double = 1;
     344  	    spec->info.is_long = 1;
     345  	    break;
     346  	  default:
     347  	    /* ISO C requires this error to be detected.  */
     348  	    __set_errno (EINVAL);
     349  	    *failed = true;
     350  	    break;
     351  	  }
     352  	break;
     353        default:
     354  	/* Not a recognized modifier.  Backup.  */
     355  	--format;
     356  	break;
     357        }
     358  
     359    /* Get the format specification.  */
     360    spec->info.spec = (wchar_t) *format++;
     361    spec->size = -1;
     362    if (__builtin_expect (__printf_function_table == NULL, 1)
     363        || spec->info.spec > UCHAR_MAX
     364        || __printf_arginfo_table[spec->info.spec] == NULL
     365        /* We don't try to get the types for all arguments if the format
     366  	 uses more than one.  The normal case is covered though.  If
     367  	 the call returns -1 we continue with the normal specifiers.  */
     368        || (int) (spec->ndata_args = (*__printf_arginfo_table[spec->info.spec])
     369  				   (&spec->info, 1, &spec->data_arg_type,
     370  				    &spec->size)) < 0)
     371      {
     372        /* Find the data argument types of a built-in spec.  */
     373        spec->ndata_args = 1;
     374  
     375        switch (spec->info.spec)
     376  	{
     377  	case L'i':
     378  	case L'd':
     379  	case L'u':
     380  	case L'o':
     381  	case L'X':
     382  	case L'x':
     383  	case L'B':
     384  	case L'b':
     385  #if LONG_MAX != LONG_LONG_MAX
     386  	  if (spec->info.is_long_double)
     387  	    spec->data_arg_type = PA_INT|PA_FLAG_LONG_LONG;
     388  	  else
     389  #endif
     390  	    if (spec->info.is_long)
     391  	      spec->data_arg_type = PA_INT|PA_FLAG_LONG;
     392  	    else if (spec->info.is_short)
     393  	      spec->data_arg_type = PA_INT|PA_FLAG_SHORT;
     394  	    else if (spec->info.is_char)
     395  	      spec->data_arg_type = PA_CHAR;
     396  	    else
     397  	      spec->data_arg_type = PA_INT;
     398  	  break;
     399  	case L'e':
     400  	case L'E':
     401  	case L'f':
     402  	case L'F':
     403  	case L'g':
     404  	case L'G':
     405  	case L'a':
     406  	case L'A':
     407  	  if (spec->info.is_long_double)
     408  	    spec->data_arg_type = PA_DOUBLE|PA_FLAG_LONG_DOUBLE;
     409  	  else
     410  	    spec->data_arg_type = PA_DOUBLE;
     411  	  break;
     412  	case L'c':
     413  	  spec->data_arg_type = PA_CHAR;
     414  	  break;
     415  	case L'C':
     416  	  spec->data_arg_type = PA_WCHAR;
     417  	  break;
     418  	case L's':
     419  	  spec->data_arg_type = PA_STRING;
     420  	  break;
     421  	case L'S':
     422  	  spec->data_arg_type = PA_WSTRING;
     423  	  break;
     424  	case L'p':
     425  	  spec->data_arg_type = PA_POINTER;
     426  	  break;
     427  	case L'n':
     428  	  spec->data_arg_type = PA_INT|PA_FLAG_PTR;
     429  	  break;
     430  
     431  	case L'm':
     432  	default:
     433  	  /* An unknown spec will consume no args.  */
     434  	  spec->ndata_args = 0;
     435  	  break;
     436  	}
     437      }
     438  
     439    if (spec->data_arg == -1 && spec->ndata_args > 0)
     440      {
     441        /* There are args consumed, but no positional spec.  Use the
     442  	 next sequential arg position.  */
     443        spec->data_arg = posn;
     444        nargs += spec->ndata_args;
     445      }
     446  
     447    if (spec->info.spec == L'\0')
     448      /* Format ended before this spec was complete.  */
     449      spec->end_of_fmt = spec->next_fmt = format - 1;
     450    else
     451      {
     452        /* Find the next format spec.  */
     453        spec->end_of_fmt = format;
     454  #ifdef COMPILE_WPRINTF
     455        spec->next_fmt = __find_specwc (format);
     456  #else
     457        spec->next_fmt = __find_specmb (format);
     458  #endif
     459      }
     460  
     461    return nargs;
     462  }