1  /* __gmp_doprnt -- printf style formatted output.
       2  
       3     THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
       4     CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
       5     FUTURE GNU MP RELEASES.
       6  
       7  Copyright 2001-2003 Free Software Foundation, Inc.
       8  
       9  This file is part of the GNU MP Library.
      10  
      11  The GNU MP Library is free software; you can redistribute it and/or modify
      12  it under the terms of either:
      13  
      14    * the GNU Lesser General Public License as published by the Free
      15      Software Foundation; either version 3 of the License, or (at your
      16      option) any later version.
      17  
      18  or
      19  
      20    * the GNU General Public License as published by the Free Software
      21      Foundation; either version 2 of the License, or (at your option) any
      22      later version.
      23  
      24  or both in parallel, as here.
      25  
      26  The GNU MP Library is distributed in the hope that it will be useful, but
      27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      29  for more details.
      30  
      31  You should have received copies of the GNU General Public License and the
      32  GNU Lesser General Public License along with the GNU MP Library.  If not,
      33  see https://www.gnu.org/licenses/.  */
      34  
      35  #define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */
      36  
      37  #include "config.h"	/* needed for the HAVE_, could also move gmp incls */
      38  
      39  #include <stdarg.h>
      40  #include <ctype.h>     /* for isdigit */
      41  #include <stddef.h>    /* for ptrdiff_t */
      42  #include <string.h>
      43  #include <stdio.h>     /* for NULL */
      44  #include <stdlib.h>
      45  
      46  #if HAVE_INTTYPES_H
      47  # include <inttypes.h> /* for intmax_t */
      48  #endif
      49  #if HAVE_STDINT_H
      50  # include <stdint.h>
      51  #endif
      52  
      53  #if HAVE_LANGINFO_H
      54  #include <langinfo.h>  /* for nl_langinfo */
      55  #endif
      56  
      57  #if HAVE_LOCALE_H
      58  #include <locale.h>    /* for localeconv */
      59  #endif
      60  
      61  #if HAVE_SYS_TYPES_H
      62  #include <sys/types.h> /* for quad_t */
      63  #endif
      64  
      65  #include "gmp-impl.h"
      66  
      67  
      68  /* change this to "#define TRACE(x) x" for diagnostics */
      69  #define TRACE(x)
      70  
      71  
      72  /* Should be portable, but in any case this is only used under some ASSERTs. */
      73  #define va_equal(x, y)                           \
      74    (memcmp (&(x), &(y), sizeof(va_list)) == 0)
      75  
      76  
      77  /* printf is convenient because it allows various types to be printed in one
      78     fairly compact call, so having gmp_printf support the standard types as
      79     well as the gmp ones is important.  This ends up meaning all the standard
      80     parsing must be duplicated, to get a new routine recognising the gmp
      81     extras.
      82  
      83     With the currently favoured handling of mpz etc as Z, Q and F type
      84     markers, it's not possible to use glibc register_printf_function since
      85     that only accepts new conversion characters, not new types.  If Z was a
      86     conversion there'd be no way to specify hex, decimal or octal, or
      87     similarly with F no way to specify fixed point or scientific format.
      88  
      89     It seems wisest to pass conversions %f, %e and %g of float, double and
      90     long double over to the standard printf.  It'd be hard to be sure of
      91     getting the right handling for NaNs, rounding, etc.  Integer conversions
      92     %d etc and string conversions %s on the other hand could be easily enough
      93     handled within gmp_doprnt, but if floats are going to libc then it's just
      94     as easy to send all non-gmp types there.
      95  
      96     "Z" was a type marker for size_t in old glibc, but there seems no need to
      97     provide access to that now "z" is standard.
      98  
      99     In GMP 4.1.1 we documented "ll" and "L" as being equivalent, but in C99
     100     in fact "ll" is just for long long and "L" just for long double.
     101     Apparently GLIBC allows "L" for long long though.  This doesn't affect
     102     us as such, since both are passed through to the C library.  To be
     103     consistent with what we said before, the two are treated equivalently
     104     here, and it's left to the C library to do what it thinks with them.
     105  
     106     Possibilities:
     107  
     108     "b" might be nice for binary output, and could even be supported for the
     109     standard C types too if desired.
     110  
     111     POSIX style "%n$" parameter numbering would be possible, but would need
     112     to be handled completely within gmp_doprnt, since the numbering will be
     113     all different once the format string it cut into pieces.
     114  
     115     Some options for mpq formatting would be good.  Perhaps a non-zero
     116     precision field could give a width for the denominator and mean always
     117     put a "/".  A form "n+p/q" might interesting too, though perhaps that's
     118     better left to applications.
     119  
     120     Right now there's no way for an application to know whether types like
     121     intmax_t are supported here.  If configure is doing its job and the same
     122     compiler is used for gmp as for the application then there shouldn't be
     123     any problem, but perhaps gmp.h should have some preprocessor symbols to
     124     say what libgmp can do.  */
     125  
     126  
     127  
     128  /* If a gmp format is the very first thing or there are two gmp formats with
     129     nothing in between then we'll reach here with this_fmt == last_fmt and we
     130     can do nothing in that case.
     131  
     132     last_ap is always replaced after a FLUSH, so it doesn't matter if va_list
     133     is a call-by-reference and the funs->format routine modifies it.  */
     134  
     135  #define FLUSH()                                         \
     136    do {                                                  \
     137      if (this_fmt == last_fmt)                           \
     138        {                                                 \
     139  	TRACE (printf ("nothing to flush\n"));          \
     140  	ASSERT (va_equal (this_ap, last_ap));           \
     141        }                                                 \
     142      else                                                \
     143        {                                                 \
     144  	ASSERT (*this_fmt == '%');                      \
     145  	*this_fmt = '\0';                               \
     146  	TRACE (printf ("flush \"%s\"\n", last_fmt));    \
     147  	DOPRNT_FORMAT (last_fmt, last_ap);              \
     148        }                                                 \
     149    } while (0)
     150  
     151  
     152  /* Parse up the given format string and do the appropriate output using the
     153     given "funs" routines.  The data parameter is passed through to those
     154     routines.  */
     155  
     156  int
     157  __gmp_doprnt (const struct doprnt_funs_t *funs, void *data,
     158  	      const char *orig_fmt, va_list orig_ap)
     159  {
     160    va_list  ap, this_ap, last_ap;
     161    size_t   alloc_fmt_size, orig_fmt_size;
     162    char     *fmt, *alloc_fmt, *last_fmt, *this_fmt, *gmp_str;
     163    int      retval = 0;
     164    int      type, fchar, *value, seen_precision;
     165    struct doprnt_params_t param;
     166  
     167    TRACE (printf ("gmp_doprnt \"%s\"\n", orig_fmt));
     168  
     169    /* Don't modify orig_ap, if va_list is actually an array and hence call by
     170       reference.  It could be argued that it'd be more efficient to leave the
     171       caller to make a copy if it cared, but doing so here is going to be a
     172       very small part of the total work, and we may as well keep applications
     173       out of trouble.  */
     174    va_copy (ap, orig_ap);
     175  
     176    /* The format string is chopped up into pieces to be passed to
     177       funs->format.  Unfortunately that means it has to be copied so each
     178       piece can be null-terminated.  We're not going to be very fast here, so
     179       use __gmp_allocate_func rather than TMP_ALLOC, to avoid overflowing the
     180       stack if a long output string is given.  */
     181    alloc_fmt_size = orig_fmt_size = strlen (orig_fmt) + 1;
     182  #if _LONG_LONG_LIMB
     183    /* for a long long limb we change %Mx to %llx, so could need an extra 1
     184       char for every 3 existing */
     185    alloc_fmt_size += alloc_fmt_size / 3;
     186  #endif
     187    alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
     188    fmt = alloc_fmt;
     189    memcpy (fmt, orig_fmt, orig_fmt_size);
     190  
     191    /* last_fmt and last_ap are just after the last output, and hence where
     192       the next output will begin, when that's done */
     193    last_fmt = fmt;
     194    va_copy (last_ap, ap);
     195  
     196    for (;;)
     197      {
     198        TRACE (printf ("next: \"%s\"\n", fmt));
     199  
     200        fmt = strchr (fmt, '%');
     201        if (fmt == NULL)
     202  	break;
     203  
     204        /* this_fmt and this_ap are the current '%' sequence being considered */
     205        this_fmt = fmt;
     206        va_copy (this_ap, ap);
     207        fmt++; /* skip the '%' */
     208  
     209        TRACE (printf ("considering\n");
     210  	     printf ("  last: \"%s\"\n", last_fmt);
     211  	     printf ("  this: \"%s\"\n", this_fmt));
     212  
     213        type = '\0';
     214        value = &param.width;
     215  
     216        param.base = 10;
     217        param.conv = 0;
     218        param.expfmt = "e%c%02ld";
     219        param.exptimes4 = 0;
     220        param.fill = ' ';
     221        param.justify = DOPRNT_JUSTIFY_RIGHT;
     222        param.prec = 6;
     223        param.showbase = DOPRNT_SHOWBASE_NO;
     224        param.showpoint = 0;
     225        param.showtrailing = 1;
     226        param.sign = '\0';
     227        param.width = 0;
     228        seen_precision = 0;
     229  
     230        /* This loop parses a single % sequence.  "break" from the switch
     231  	 means continue with this %, "goto next" means the conversion
     232  	 character has been seen and a new % should be sought.  */
     233        for (;;)
     234  	{
     235  	  fchar = *fmt++;
     236  	  if (fchar == '\0')
     237  	    break;
     238  
     239  	  switch (fchar) {
     240  
     241  	  case 'a':
     242  	    /* %a behaves like %e, but defaults to all significant digits,
     243  	       and there's no leading zeros on the exponent (which is in
     244  	       fact bit-based) */
     245  	    param.base = 16;
     246  	    param.expfmt = "p%c%ld";
     247  	    goto conv_a;
     248  	  case 'A':
     249  	    param.base = -16;
     250  	    param.expfmt = "P%c%ld";
     251  	  conv_a:
     252  	    param.conv = DOPRNT_CONV_SCIENTIFIC;
     253  	    param.exptimes4 = 1;
     254  	    if (! seen_precision)
     255  	      param.prec = -1;  /* default to all digits */
     256  	    param.showbase = DOPRNT_SHOWBASE_YES;
     257  	    param.showtrailing = 1;
     258  	    goto floating_a;
     259  
     260  	  case 'c':
     261  	    /* Let's assume wchar_t will be promoted to "int" in the call,
     262  	       the same as char will be. */
     263  	    (void) va_arg (ap, int);
     264  	    goto next;
     265  
     266  	  case 'd':
     267  	  case 'i':
     268  	  case 'u':
     269  	  integer:
     270  	    TRACE (printf ("integer, base=%d\n", param.base));
     271  	    if (! seen_precision)
     272  	      param.prec = -1;
     273  	    switch (type) {
     274  	    case 'j':
     275  	      /* Let's assume uintmax_t is the same size as intmax_t. */
     276  #if HAVE_INTMAX_T
     277  	      (void) va_arg (ap, intmax_t);
     278  #else
     279  	      ASSERT_FAIL (intmax_t not available);
     280  #endif
     281  	      break;
     282  	    case 'l':
     283  	      (void) va_arg (ap, long);
     284  	      break;
     285  	    case 'L':
     286  #if HAVE_LONG_LONG
     287  	      (void) va_arg (ap, long long);
     288  #else
     289  	      ASSERT_FAIL (long long not available);
     290  #endif
     291  	      break;
     292  	    case 'N':
     293  	      {
     294  		mp_ptr     xp;
     295  		mp_size_t  xsize, abs_xsize;
     296  		mpz_t      z;
     297  		FLUSH ();
     298  		xp = va_arg (ap, mp_ptr);
     299  		PTR(z) = xp;
     300  		xsize = (int) va_arg (ap, mp_size_t);
     301  		abs_xsize = ABS (xsize);
     302  		MPN_NORMALIZE (xp, abs_xsize);
     303  		SIZ(z) = (xsize >= 0 ? abs_xsize : -abs_xsize);
     304  		ASSERT_CODE (ALLOC(z) = abs_xsize);
     305  		gmp_str = mpz_get_str (NULL, param.base, z);
     306  		goto gmp_integer;
     307  	      }
     308  	      /* break; */
     309  	    case 'q':
     310  	      /* quad_t is probably the same as long long, but let's treat
     311  		 it separately just to be sure.  Also let's assume u_quad_t
     312  		 will be the same size as quad_t.  */
     313  #if HAVE_QUAD_T
     314  	      (void) va_arg (ap, quad_t);
     315  #else
     316  	      ASSERT_FAIL (quad_t not available);
     317  #endif
     318  	      break;
     319  	    case 'Q':
     320  	      FLUSH ();
     321  	      gmp_str = mpq_get_str (NULL, param.base, va_arg(ap, mpq_srcptr));
     322  	      goto gmp_integer;
     323  	    case 't':
     324  #if HAVE_PTRDIFF_T
     325  	      (void) va_arg (ap, ptrdiff_t);
     326  #else
     327  	      ASSERT_FAIL (ptrdiff_t not available);
     328  #endif
     329  	      break;
     330  	    case 'z':
     331  	      (void) va_arg (ap, size_t);
     332  	      break;
     333  	    case 'Z':
     334  	      {
     335  		int   ret;
     336  		FLUSH ();
     337  		gmp_str = mpz_get_str (NULL, param.base,
     338  				       va_arg (ap, mpz_srcptr));
     339  	      gmp_integer:
     340  		ret = __gmp_doprnt_integer (funs, data, &param, gmp_str);
     341  		 __GMP_FREE_FUNC_TYPE (gmp_str, strlen(gmp_str)+1, char);
     342  		DOPRNT_ACCUMULATE (ret);
     343  		va_copy (last_ap, ap);
     344  		last_fmt = fmt;
     345  	      }
     346  	      break;
     347  	    default:
     348  	      /* default is an "int", and this includes h=short and hh=char
     349  		 since they're promoted to int in a function call */
     350  	      (void) va_arg (ap, int);
     351  	      break;
     352  	    }
     353  	    goto next;
     354  
     355  	  case 'E':
     356  	    param.base = -10;
     357  	    param.expfmt = "E%c%02ld";
     358  	    /*FALLTHRU*/
     359  	  case 'e':
     360  	    param.conv = DOPRNT_CONV_SCIENTIFIC;
     361  	  floating:
     362  	    if (param.showbase == DOPRNT_SHOWBASE_NONZERO)
     363  	      {
     364  		/* # in %e, %f and %g */
     365  		param.showpoint = 1;
     366  		param.showtrailing = 1;
     367  	      }
     368  	  floating_a:
     369  	    switch (type) {
     370  	    case 'F':
     371  	      FLUSH ();
     372  	      DOPRNT_ACCUMULATE (__gmp_doprnt_mpf (funs, data, &param,
     373  						   GMP_DECIMAL_POINT,
     374  						   va_arg (ap, mpf_srcptr)));
     375  	      va_copy (last_ap, ap);
     376  	      last_fmt = fmt;
     377  	      break;
     378  	    case 'L':
     379  #if HAVE_LONG_DOUBLE
     380  	      (void) va_arg (ap, long double);
     381  #else
     382  	      ASSERT_FAIL (long double not available);
     383  #endif
     384  	      break;
     385  	    default:
     386  	      (void) va_arg (ap, double);
     387  	      break;
     388  	    }
     389  	    goto next;
     390  
     391  	  case 'f':
     392  	    param.conv = DOPRNT_CONV_FIXED;
     393  	    goto floating;
     394  
     395  	  case 'F': /* mpf_t     */
     396  	  case 'j': /* intmax_t  */
     397  	  case 'L': /* long long */
     398  	  case 'N': /* mpn       */
     399  	  case 'q': /* quad_t    */
     400  	  case 'Q': /* mpq_t     */
     401  	  case 't': /* ptrdiff_t */
     402  	  case 'z': /* size_t    */
     403  	  case 'Z': /* mpz_t     */
     404  	  set_type:
     405  	    type = fchar;
     406  	    break;
     407  
     408  	  case 'G':
     409  	    param.base = -10;
     410  	    param.expfmt = "E%c%02ld";
     411  	    /*FALLTHRU*/
     412  	  case 'g':
     413  	    param.conv = DOPRNT_CONV_GENERAL;
     414  	    param.showtrailing = 0;
     415  	    goto floating;
     416  
     417  	  case 'h':
     418  	    if (type != 'h')
     419  	      goto set_type;
     420  	    type = 'H';   /* internal code for "hh" */
     421  	    break;
     422  
     423  	  case 'l':
     424  	    if (type != 'l')
     425  	      goto set_type;
     426  	    type = 'L';   /* "ll" means "L" */
     427  	    break;
     428  
     429  	  case 'm':
     430  	    /* glibc strerror(errno), no argument */
     431  	    goto next;
     432  
     433  	  case 'M': /* mp_limb_t */
     434  	    /* mung format string to l or ll and let plain printf handle it */
     435  #if _LONG_LONG_LIMB
     436  	    memmove (fmt+1, fmt, strlen (fmt)+1);
     437  	    fmt[-1] = 'l';
     438  	    fmt[0] = 'l';
     439  	    fmt++;
     440  	    type = 'L';
     441  #else
     442  	    fmt[-1] = 'l';
     443  	    type = 'l';
     444  #endif
     445  	    break;
     446  
     447  	  case 'n':
     448  	    {
     449  	      void  *p;
     450  	      FLUSH ();
     451  	      p = va_arg (ap, void *);
     452  	      switch (type) {
     453  	      case '\0': * (int       *) p = retval; break;
     454  	      case 'F':  mpf_set_si ((mpf_ptr) p, (long) retval); break;
     455  	      case 'H':  * (char      *) p = retval; break;
     456  	      case 'h':  * (short     *) p = retval; break;
     457  #if HAVE_INTMAX_T
     458  	      case 'j':  * (intmax_t  *) p = retval; break;
     459  #else
     460  	      case 'j':  ASSERT_FAIL (intmax_t not available); break;
     461  #endif
     462  	      case 'l':  * (long      *) p = retval; break;
     463  #if HAVE_QUAD_T && HAVE_LONG_LONG
     464  	      case 'q':
     465  		ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
     466  		/*FALLTHRU*/
     467  #else
     468  	      case 'q':  ASSERT_FAIL (quad_t not available); break;
     469  #endif
     470  #if HAVE_LONG_LONG
     471  	      case 'L':  * (long long *) p = retval; break;
     472  #else
     473  	      case 'L':  ASSERT_FAIL (long long not available); break;
     474  #endif
     475  	      case 'N':
     476  		{
     477  		  mp_size_t  n;
     478  		  n = va_arg (ap, mp_size_t);
     479  		  n = ABS (n);
     480  		  if (n != 0)
     481  		    {
     482  		      * (mp_ptr) p = retval;
     483  		      MPN_ZERO ((mp_ptr) p + 1, n - 1);
     484  		    }
     485  		}
     486  		break;
     487  	      case 'Q':  mpq_set_si ((mpq_ptr) p, (long) retval, 1L); break;
     488  #if HAVE_PTRDIFF_T
     489  	      case 't':  * (ptrdiff_t *) p = retval; break;
     490  #else
     491  	      case 't':  ASSERT_FAIL (ptrdiff_t not available); break;
     492  #endif
     493  	      case 'z':  * (size_t    *) p = retval; break;
     494  	      case 'Z':  mpz_set_si ((mpz_ptr) p, (long) retval); break;
     495  	      }
     496  	    }
     497  	    va_copy (last_ap, ap);
     498  	    last_fmt = fmt;
     499  	    goto next;
     500  
     501  	  case 'o':
     502  	    param.base = 8;
     503  	    goto integer;
     504  
     505  	  case 'p':
     506  	  case 's':
     507  	    /* "void *" will be good enough for "char *" or "wchar_t *", no
     508  	       need for separate code.  */
     509  	    (void) va_arg (ap, const void *);
     510  	    goto next;
     511  
     512  	  case 'x':
     513  	    param.base = 16;
     514  	    goto integer;
     515  	  case 'X':
     516  	    param.base = -16;
     517  	    goto integer;
     518  
     519  	  case '%':
     520  	    goto next;
     521  
     522  	  case '#':
     523  	    param.showbase = DOPRNT_SHOWBASE_NONZERO;
     524  	    break;
     525  
     526  	  case '\'':
     527  	    /* glibc digit grouping, just pass it through, no support for it
     528  	       on gmp types */
     529  	    break;
     530  
     531  	  case '+':
     532  	  case ' ':
     533  	    param.sign = fchar;
     534  	    break;
     535  
     536  	  case '-':
     537  	    param.justify = DOPRNT_JUSTIFY_LEFT;
     538  	    break;
     539  	  case '.':
     540  	    seen_precision = 1;
     541  	    param.prec = -1; /* "." alone means all necessary digits */
     542  	    value = &param.prec;
     543  	    break;
     544  
     545  	  case '*':
     546  	    {
     547  	      int n = va_arg (ap, int);
     548  
     549  	      if (value == &param.width)
     550  		{
     551  		  /* negative width means left justify */
     552  		  if (n < 0)
     553  		    {
     554  		      param.justify = DOPRNT_JUSTIFY_LEFT;
     555  		      n = -n;
     556  		    }
     557  		  param.width = n;
     558  		}
     559  	      else
     560  		{
     561  		  /* don't allow negative precision */
     562  		  param.prec = MAX (0, n);
     563  		}
     564  	    }
     565  	    break;
     566  
     567  	  case '0':
     568  	    if (value == &param.width)
     569  	      {
     570  		/* in width field, set fill */
     571  		param.fill = '0';
     572  
     573  		/* for right justify, put the fill after any minus sign */
     574  		if (param.justify == DOPRNT_JUSTIFY_RIGHT)
     575  		  param.justify = DOPRNT_JUSTIFY_INTERNAL;
     576  	      }
     577  	    else
     578  	      {
     579  		/* in precision field, set value */
     580  		*value = 0;
     581  	      }
     582  	    break;
     583  
     584  	  case '1': case '2': case '3': case '4': case '5':
     585  	  case '6': case '7': case '8': case '9':
     586  	    /* process all digits to form a value */
     587  	    {
     588  	      int  n = 0;
     589  	      do {
     590  		n = n * 10 + (fchar-'0');
     591  		fchar = *fmt++;
     592  	      } while (isascii (fchar) && isdigit (fchar));
     593  	      fmt--; /* unget the non-digit */
     594  	      *value = n;
     595  	    }
     596  	    break;
     597  
     598  	  default:
     599  	    /* something invalid */
     600  	    ASSERT (0);
     601  	    goto next;
     602  	  }
     603  	}
     604  
     605      next:
     606        /* Stop parsing the current "%" format, look for a new one. */
     607        ;
     608      }
     609  
     610    TRACE (printf ("remainder: \"%s\"\n", last_fmt));
     611    if (*last_fmt != '\0')
     612      DOPRNT_FORMAT (last_fmt, last_ap);
     613  
     614    if (funs->final != NULL)
     615      if ((*funs->final) (data) == -1)
     616        goto error;
     617  
     618   done:
     619    __GMP_FREE_FUNC_TYPE (alloc_fmt, alloc_fmt_size, char);
     620    return retval;
     621  
     622   error:
     623    retval = -1;
     624    goto done;
     625  }