1  /* mpfr_strtofr -- set a floating-point number from a string
       2  
       3  Copyright 2004-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #include <ctype.h>  /* For isspace */
      24  
      25  #define MPFR_NEED_LONGLONG_H
      26  #include "mpfr-impl.h"
      27  
      28  #define MPFR_MAX_BASE 62
      29  
      30  struct parsed_string {
      31    int            negative; /* non-zero iff the number is negative */
      32    int            base;     /* base of the string */
      33    unsigned char *mantissa; /* raw significand (without any point) */
      34    unsigned char *mant;     /* stripped significand (without starting and
      35                                ending zeroes). This points inside the area
      36                                allocated for the mantissa field. */
      37    size_t         prec;     /* length of mant (zero for +/-0) */
      38    size_t         alloc;    /* allocation size of mantissa */
      39    mpfr_exp_t     exp_base; /* number of digits before the point, + exponent
      40                                except in case of binary exponent (exp_bin) */
      41    mpfr_exp_t     exp_bin;  /* binary exponent of the pxxx format for
      42                                base = 2 or 16 */
      43  };
      44  
      45  /* This table has been generated by the following program.
      46     For 2 <= b <= MPFR_MAX_BASE,
      47     RedInvLog2Table[b-2][0] / RedInvLog2Table[b-2][1]
      48     is an upper approximation to log(2)/log(b), no larger than 1.
      49     Note: these numbers must fit on 16 bits, thus unsigned int is OK.
      50  */
      51  static const unsigned int RedInvLog2Table[MPFR_MAX_BASE-1][2] = {
      52    {1, 1},
      53    {53, 84},
      54    {1, 2},
      55    {4004, 9297},
      56    {53, 137},
      57    {2393, 6718},
      58    {1, 3},
      59    {665, 2108},
      60    {4004, 13301},
      61    {949, 3283},
      62    {53, 190},
      63    {5231, 19357},
      64    {2393, 9111},
      65    {247, 965},
      66    {1, 4},
      67    {4036, 16497},
      68    {665, 2773},
      69    {5187, 22034},
      70    {4004, 17305},
      71    {51, 224},
      72    {949, 4232},
      73    {3077, 13919},
      74    {53, 243},
      75    {73, 339},
      76    {5231, 24588},
      77    {665, 3162},
      78    {2393, 11504},
      79    {4943, 24013},
      80    {247, 1212},
      81    {3515, 17414},
      82    {1, 5},
      83    {4415, 22271},
      84    {4036, 20533},
      85    {263, 1349},
      86    {665, 3438},
      87    {1079, 5621},
      88    {5187, 27221},
      89    {2288, 12093},
      90    {4004, 21309},
      91    {179, 959},
      92    {51, 275},
      93    {495, 2686},
      94    {949, 5181},
      95    {3621, 19886},
      96    {3077, 16996},
      97    {229, 1272},
      98    {53, 296},
      99    {109, 612},
     100    {73, 412},
     101    {1505, 8537},
     102    {5231, 29819},
     103    {283, 1621},
     104    {665, 3827},
     105    {32, 185},
     106    {2393, 13897},
     107    {1879, 10960},
     108    {4943, 28956},
     109    {409, 2406},
     110    {247, 1459},
     111    {231, 1370},
     112    {3515, 20929} };
     113  #if 0
     114  #define N 8
     115  int main ()
     116  {
     117    unsigned long tab[N];
     118    int i, n, base;
     119    mpfr_t x, y;
     120    mpq_t q1, q2;
     121    int overflow = 0, base_overflow;
     122  
     123    mpfr_init2 (x, 200);
     124    mpfr_init2 (y, 200);
     125    mpq_init (q1);
     126    mpq_init (q2);
     127  
     128    for (base = 2 ; base < 63 ; base ++)
     129      {
     130        mpfr_set_ui (x, base, MPFR_RNDN);
     131        mpfr_log2 (x, x, MPFR_RNDN);
     132        mpfr_ui_div (x, 1, x, MPFR_RNDN);
     133        printf ("Base: %d x=%e ", base, mpfr_get_d1 (x));
     134        for (i = 0 ; i < N ; i++)
     135          {
     136            mpfr_floor (y, x);
     137            tab[i] = mpfr_get_ui (y, MPFR_RNDN);
     138            mpfr_sub (x, x, y, MPFR_RNDN);
     139            mpfr_ui_div (x, 1, x, MPFR_RNDN);
     140          }
     141        for (i = N-1 ; i >= 0 ; i--)
     142          if (tab[i] != 0)
     143            break;
     144        mpq_set_ui (q1, tab[i], 1);
     145        for (i = i-1 ; i >= 0 ; i--)
     146            {
     147              mpq_inv (q1, q1);
     148              mpq_set_ui (q2, tab[i], 1);
     149              mpq_add (q1, q1, q2);
     150            }
     151        printf("Approx: ", base);
     152        mpq_out_str (stdout, 10, q1);
     153        printf (" = %e\n", mpq_get_d (q1) );
     154        fprintf (stderr, "{");
     155        mpz_out_str (stderr, 10, mpq_numref (q1));
     156        fprintf (stderr, "UL, ");
     157        mpz_out_str (stderr, 10, mpq_denref (q1));
     158        fprintf (stderr, "UL},\n");
     159        if (mpz_cmp_ui (mpq_numref (q1), 1<<16-1) >= 0
     160            || mpz_cmp_ui (mpq_denref (q1), 1<<16-1) >= 0)
     161          overflow = 1, base_overflow = base;
     162      }
     163  
     164    mpq_clear (q2);
     165    mpq_clear (q1);
     166    mpfr_clear (y);
     167    mpfr_clear (x);
     168    if (overflow )
     169      printf ("OVERFLOW for base =%d!\n", base_overflow);
     170  }
     171  #endif
     172  
     173  
     174  /* Compatible with any locale, but one still assumes that 'a', 'b', 'c',
     175     ..., 'z', and 'A', 'B', 'C', ..., 'Z' are consecutive values (like
     176     in any ASCII-based character set). */
     177  static int
     178  digit_value_in_base (int c, int base)
     179  {
     180    int digit;
     181  
     182    MPFR_ASSERTD (base > 0 && base <= MPFR_MAX_BASE);
     183  
     184    if (c >= '0' && c <= '9')
     185      digit = c - '0';
     186    else if (c >= 'a' && c <= 'z')
     187      digit = (base >= 37) ? c - 'a' + 36 : c - 'a' + 10;
     188    else if (c >= 'A' && c <= 'Z')
     189      digit = c - 'A' + 10;
     190    else
     191      return -1;
     192  
     193    return MPFR_LIKELY (digit < base) ? digit : -1;
     194  }
     195  
     196  /* Compatible with any locale, but one still assumes that 'a', 'b', 'c',
     197     ..., 'z', and 'A', 'B', 'C', ..., 'Z' are consecutive values (like
     198     in any ASCII-based character set). */
     199  /* TODO: support EBCDIC. */
     200  static int
     201  fast_casecmp (const char *s1, const char *s2)
     202  {
     203    unsigned char c1, c2;
     204  
     205    do
     206      {
     207        c2 = *(const unsigned char *) s2++;
     208        if (c2 == '\0')
     209          return 0;
     210        c1 = *(const unsigned char *) s1++;
     211        if (c1 >= 'A' && c1 <= 'Z')
     212          c1 = c1 - 'A' + 'a';
     213      }
     214    while (c1 == c2);
     215    return 1;
     216  }
     217  
     218  /* Parse a string and fill pstr.
     219     Return the advanced ptr too.
     220     It returns:
     221        -1 if invalid string,
     222        0 if special string (like nan),
     223        1 if the string is OK.
     224        2 if overflows
     225     So it doesn't return the ternary value
     226     BUT if it returns 0 (NAN or INF), the ternary value is also '0'
     227     (ie NAN and INF are exact) */
     228  static int
     229  parse_string (mpfr_ptr x, struct parsed_string *pstr,
     230                const char **string, int base)
     231  {
     232    const char *str = *string;
     233    unsigned char *mant;
     234    int point;
     235    int res = -1;  /* Invalid input return value */
     236    const char *prefix_str;
     237    int decimal_point;
     238  
     239    decimal_point = (unsigned char) MPFR_DECIMAL_POINT;
     240  
     241    /* Init variable */
     242    pstr->mantissa = NULL;
     243  
     244    /* Optional leading whitespace */
     245    /* For non-"C" locales, the ISO C standard allows isspace(0) to
     246       return true. So we need to stop explicitly on '\0'. */
     247    while (*str != '\0' && isspace ((unsigned char) *str))
     248      str++;
     249  
     250    /* An optional sign `+' or `-' */
     251    pstr->negative = (*str == '-');
     252    if (*str == '-' || *str == '+')
     253      str++;
     254  
     255    /* Can be case-insensitive NAN */
     256    if (fast_casecmp (str, "@nan@") == 0)
     257      {
     258        str += 5;
     259        goto set_nan;
     260      }
     261    if (base <= 16 && fast_casecmp (str, "nan") == 0)
     262      {
     263        str += 3;
     264      set_nan:
     265        /* Check for "(dummychars)" */
     266        if (*str == '(')
     267          {
     268            const char *s;
     269            for (s = str+1 ; *s != ')' ; s++)
     270              if (!(*s >= 'A' && *s <= 'Z')
     271                  && !(*s >= 'a' && *s <= 'z')
     272                  && !(*s >= '0' && *s <= '9')
     273                  && *s != '_')
     274                break;
     275            if (*s == ')')
     276              str = s+1;
     277          }
     278        *string = str;
     279        MPFR_SET_NAN(x);
     280        /* MPFR_RET_NAN not used as the return value isn't a ternary value */
     281        __gmpfr_flags |= MPFR_FLAGS_NAN;
     282        return 0;
     283      }
     284  
     285    /* Can be case-insensitive INF */
     286    if (fast_casecmp (str, "@inf@") == 0)
     287      {
     288        str += 5;
     289        goto set_inf;
     290      }
     291    if (base <= 16 && fast_casecmp (str, "infinity") == 0)
     292      {
     293        str += 8;
     294        goto set_inf;
     295      }
     296    if (base <= 16 && fast_casecmp (str, "inf") == 0)
     297      {
     298        str += 3;
     299      set_inf:
     300        *string = str;
     301        MPFR_SET_INF (x);
     302        (pstr->negative) ? MPFR_SET_NEG (x) : MPFR_SET_POS (x);
     303        return 0;
     304      }
     305  
     306    /* If base=0 or 16, it may include '0x' prefix */
     307    prefix_str = NULL;
     308    if ((base == 0 || base == 16) && str[0]=='0'
     309        && (str[1]=='x' || str[1] == 'X'))
     310      {
     311        prefix_str = str;
     312        base = 16;
     313        str += 2;
     314      }
     315    /* If base=0 or 2, it may include '0b' prefix */
     316    if ((base == 0 || base == 2) && str[0]=='0'
     317        && (str[1]=='b' || str[1] == 'B'))
     318      {
     319        prefix_str = str;
     320        base = 2;
     321        str += 2;
     322      }
     323    /* Else if base=0, we assume decimal base */
     324    if (base == 0)
     325      base = 10;
     326    pstr->base = base;
     327  
     328    /* Alloc mantissa */
     329    pstr->alloc = (size_t) strlen (str) + 1;
     330    pstr->mantissa = (unsigned char*) mpfr_allocate_func (pstr->alloc);
     331  
     332    /* Read mantissa digits */
     333   parse_begin:
     334    mant = pstr->mantissa;
     335    point = 0;
     336    pstr->exp_base = 0;
     337    pstr->exp_bin  = 0;
     338  
     339    for (;;) /* Loop until an invalid character is read */
     340      {
     341        int c = (unsigned char) *str++;
     342        /* The cast to unsigned char is needed because of digit_value_in_base;
     343           decimal_point uses this convention too. */
     344        if (c == '.' || c == decimal_point)
     345          {
     346            if (MPFR_UNLIKELY(point)) /* Second '.': stop parsing */
     347              break;
     348            point = 1;
     349            continue;
     350          }
     351        c = digit_value_in_base (c, base);
     352        if (c == -1)
     353          break;
     354        MPFR_ASSERTN (c >= 0); /* c is representable in an unsigned char */
     355        *mant++ = (unsigned char) c;
     356        if (!point)
     357          pstr->exp_base ++;
     358      }
     359    str--; /* The last read character was invalid */
     360  
     361    /* Update the # of char in the mantissa */
     362    pstr->prec = mant - pstr->mantissa;
     363    /* Check if there are no characters in the mantissa (Invalid argument) */
     364    if (pstr->prec == 0)
     365      {
     366        /* Check if there was a prefix (in such a case, we have to read
     367           again the mantissa without skipping the prefix)
     368           The allocated mantissa is still big enough since we will
     369           read only 0, and we alloc one more char than needed.
     370           FIXME: Not really friendly. Maybe cleaner code? */
     371        if (prefix_str != NULL)
     372          {
     373            str = prefix_str;
     374            prefix_str = NULL;
     375            goto parse_begin;
     376          }
     377        goto end;
     378      }
     379  
     380    /* Valid entry */
     381    res = 1;
     382    MPFR_ASSERTD (pstr->exp_base >= 0);
     383  
     384    /* FIXME: In the code below (both cases), if the exponent from the
     385       string is large, it will be replaced by MPFR_EXP_MIN or MPFR_EXP_MAX,
     386       i.e. it will have a different value. This may not change the result
     387       in most cases, but there is no guarantee on very long strings when
     388       mpfr_exp_t is a 32-bit type, as the exponent could be brought back
     389       to the current exponent range. */
     390  
     391    /* an optional exponent (e or E, p or P, @) */
     392    if ( (*str == '@' || (base <= 10 && (*str == 'e' || *str == 'E')))
     393         && (!isspace((unsigned char) str[1])) )
     394      {
     395        char *endptr;
     396        /* the exponent digits are kept in ASCII */
     397        mpfr_exp_t sum;
     398        long read_exp = strtol (str + 1, &endptr, 10);
     399        if (endptr != str+1)
     400          str = endptr;
     401        sum =
     402          read_exp < MPFR_EXP_MIN ? (str = endptr, MPFR_EXP_MIN) :
     403          read_exp > MPFR_EXP_MAX ? (str = endptr, MPFR_EXP_MAX) :
     404          (mpfr_exp_t) read_exp;
     405        MPFR_SADD_OVERFLOW (sum, sum, pstr->exp_base,
     406                            mpfr_exp_t, mpfr_uexp_t,
     407                            MPFR_EXP_MIN, MPFR_EXP_MAX,
     408                            res = 2, res = 3);
     409        /* Since exp_base was positive, read_exp + exp_base can't
     410           do a negative overflow. */
     411        MPFR_ASSERTD (res != 3);
     412        pstr->exp_base = sum;
     413      }
     414    else if ((base == 2 || base == 16)
     415             && (*str == 'p' || *str == 'P')
     416             && (!isspace((unsigned char) str[1])))
     417      {
     418        char *endptr;
     419        long read_exp = strtol (str + 1, &endptr, 10);
     420        if (endptr != str+1)
     421          str = endptr;
     422        pstr->exp_bin =
     423          read_exp < MPFR_EXP_MIN ? (str = endptr, MPFR_EXP_MIN) :
     424          read_exp > MPFR_EXP_MAX ? (str = endptr, MPFR_EXP_MAX) :
     425          (mpfr_exp_t) read_exp;
     426      }
     427  
     428    /* Remove 0's at the beginning and end of mantissa[0..prec-1] */
     429    mant = pstr->mantissa;
     430    for ( ; (pstr->prec > 0) && (*mant == 0) ; mant++, pstr->prec--)
     431      if (MPFR_LIKELY (pstr->exp_base != MPFR_EXP_MIN))
     432        pstr->exp_base--;
     433    for ( ; (pstr->prec > 0) && (mant[pstr->prec - 1] == 0); pstr->prec--);
     434    pstr->mant = mant;
     435  
     436    /* Check if x = 0 */
     437    if (pstr->prec == 0)
     438      {
     439        MPFR_SET_ZERO (x);
     440        if (pstr->negative)
     441          MPFR_SET_NEG(x);
     442        else
     443          MPFR_SET_POS(x);
     444        res = 0;
     445      }
     446  
     447    *string = str;
     448   end:
     449    if (pstr->mantissa != NULL && res != 1)
     450      mpfr_free_func (pstr->mantissa, pstr->alloc);
     451    return res;
     452  }
     453  
     454  /* Transform a parsed string to a mpfr_t according to the rounding mode
     455     and the precision of x.
     456     Returns the ternary value. */
     457  static int
     458  parsed_string_to_mpfr (mpfr_ptr x, struct parsed_string *pstr, mpfr_rnd_t rnd)
     459  {
     460    mpfr_prec_t precx, prec, ysize_bits, pstr_size;
     461    mpfr_exp_t exp;
     462    mp_limb_t *result;
     463    int count, exact;
     464    mp_size_t ysize, real_ysize, diff_ysize;
     465    int res, err;
     466    const int extra_limbs = GMP_NUMB_BITS >= 12 ? 1 : 2; /* see below */
     467    MPFR_ZIV_DECL (loop);
     468    MPFR_TMP_DECL (marker);
     469  
     470    /* initialize the working precision */
     471    precx = MPFR_GET_PREC (x);
     472    prec = precx + MPFR_INT_CEIL_LOG2 (precx);
     473  
     474    /* Compute the value y of the leading characters as long as rounding is not
     475       possible.
     476       Note: We have some integer overflow checking using MPFR_EXP_MIN and
     477       MPFR_EXP_MAX in this loop. Thanks to the large margin between these
     478       extremal values of the mpfr_exp_t type and the valid minimum/maximum
     479       exponents, such integer overflows would correspond to real underflow
     480       or overflow on the result (possibly except in huge precisions, which
     481       are disregarded here; anyway, in practice, such issues could occur
     482       only with 32-bit precision and exponent types). Such checks could be
     483       extended to real early underflow/overflow checking, in order to avoid
     484       useless computations in such cases; in such a case, be careful that
     485       the approximation errors need to be taken into account. */
     486    MPFR_TMP_MARK(marker);
     487    MPFR_ZIV_INIT (loop, prec);
     488    for (;;)
     489      {
     490        mp_limb_t *y0, *y;
     491  
     492        /* y will be regarded as a number with precision prec. */
     493        ysize = MPFR_PREC2LIMBS (prec);
     494        /* prec bits corresponds to ysize limbs */
     495        ysize_bits = (mpfr_prec_t) ysize * GMP_NUMB_BITS;
     496        MPFR_ASSERTD (ysize_bits >= prec);
     497        /* and to ysize_bits >= prec > precx bits. */
     498        /* We need to allocate one more limb as specified by mpn_set_str
     499           (a limb may be written in rp[rn]). Note that the manual of GMP
     500           up to 5.1.3 was incorrect on this point.
     501           See the following discussion:
     502           https://gmplib.org/list-archives/gmp-bugs/2013-December/003267.html */
     503        y0 = MPFR_TMP_LIMBS_ALLOC (2 * ysize + extra_limbs + 1);
     504        y = y0 + ysize; /* y has (ysize + extra_limbs + 1) allocated limbs */
     505  
     506        /* pstr_size is the number of bytes we want to read from pstr->mant
     507           to fill at least ysize full limbs with mpn_set_str.
     508           We must have base^(pstr_size-1) >= (2^(GMP_NUMB_BITS))^ysize
     509           (in the worst case, the first digit is one and all others are zero).
     510           i.e., pstr_size >= 1 + ysize*GMP_NUMB_BITS/log2(base)
     511            Since ysize ~ prec/GMP_NUMB_BITS and prec < Umax/2 =>
     512            ysize*GMP_NUMB_BITS can not overflow.
     513           We compute pstr_size = 1 + ceil(ysize_bits * Num / Den)
     514            where 1/log2(base) <= Num/Den <= 1
     515           It is not exactly ceil(1/log2(base)) but could be one more (base 2).
     516           Quite ugly since it tries to avoid overflow:
     517           let Num = RedInvLog2Table[pstr->base-2][0]
     518           and Den = RedInvLog2Table[pstr->base-2][1],
     519           and ysize_bits = a*Den+b,
     520           then ysize_bits * Num/Den = a*Num + (b * Num)/Den,
     521           thus ceil(ysize_bits * Num/Den) = a*Num + floor(b * Num + Den - 1)/Den
     522  
     523           Note: denoting m = pstr_size and n = ysize_bits, assuming we have
     524           m = 1 + ceil(n/log2(b)), i.e., b^(m-1) >= 2^n > b^(m-2), then
     525           b^(m-1)/2^n < b, and since we consider m characters of the input,
     526           the corresponding part is less than b^m < b^2*2^n.
     527           This implies that if b^2 < 2^GMP_NUMB_BITS, which for b <= 62 holds
     528           for GMP_NUMB_BITS >= 12, we have real_ysize <= ysize+1 below
     529           (this also implies that for GMP_NUMB_BITS >= 13, the number of bits
     530           of y[real_ysize-1] below is less than GMP_NUMB_BITS, thus
     531           count < GMP_NUMB_BITS).
     532           Warning: for GMP_NUMB_BITS=8, we can have real_ysize = ysize + 2!
     533           Hence the allocation above for ysize + extra_limbs limbs.
     534        */
     535        {
     536          unsigned int Num = RedInvLog2Table[pstr->base-2][0];
     537          unsigned int Den = RedInvLog2Table[pstr->base-2][1];
     538          MPFR_ASSERTD (Num <= Den && Den <= 65535); /* thus no overflow */
     539          pstr_size = (ysize_bits / Den) * Num
     540            + ((unsigned long) (ysize_bits % Den) * Num + Den - 1) / Den
     541            + 1;
     542        }
     543  
     544        /* Since pstr_size corresponds to at least ysize_bits bits,
     545           and ysize_bits >= prec, the weight of the neglected part of
     546           pstr->mant (if any) is < ulp(y) < ulp(x). */
     547  
     548        /* If the number of wanted bytes is more than what is available
     549           in pstr->mant, i.e. pstr->prec, reduce it to pstr->prec. */
     550        if (pstr_size > pstr->prec)
     551          pstr_size = pstr->prec;
     552  
     553        /* Convert str (potentially truncated to pstr_size) into binary.
     554           Note that pstr->mant is big endian, thus no offset is needed. */
     555        real_ysize = mpn_set_str (y, pstr->mant, pstr_size, pstr->base);
     556  
     557        /* See above for the explanation of the following assertion. */
     558        MPFR_ASSERTD (real_ysize <= ysize + extra_limbs);
     559  
     560        /* The Boolean "exact" will attempt to track exactness of the result:
     561           If it is true, then this means that the result is exact, allowing
     562           termination, even though the rounding test may not succeed.
     563           Conversely, if the result is exact, then "exact" will not
     564           necessarily be true at the end of the Ziv loop, but we will need
     565           to make sure that at some point, "exact" will be true in order to
     566           guarantee termination. FIXME: check that. */
     567        /* First, consider the part of the input string that has been ignored.
     568           Note that the trailing zeros have been removed in parse_string, so
     569           that if something has been ignored, it must be non-zero. */
     570        exact = pstr_size == pstr->prec;
     571  
     572        /* Normalize y and set the initial value of its exponent exp, which
     573           is 0 when y is not shifted.
     574           Since pstr->mant was normalized, mpn_set_str guarantees that
     575           the most significant limb is non-zero. */
     576        MPFR_ASSERTD (y[real_ysize - 1] != 0); /* mpn_set_str guarantees this */
     577        count_leading_zeros (count, y[real_ysize - 1]);
     578        diff_ysize = ysize - real_ysize;
     579        MPFR_LOG_MSG (("diff_ysize = %ld\n", (long) diff_ysize));
     580        if (diff_ysize >= 0)
     581          {
     582            /* We have enough limbs to store {y, real_ysize} exactly
     583               in {y, ysize}, so that we can do a left shift, without
     584               losing any information ("exact" will not change). */
     585            if (count != 0)
     586              mpn_lshift (y + diff_ysize, y, real_ysize, count);
     587            if (diff_ysize > 0)
     588              {
     589                if (count == 0)
     590                  mpn_copyd (y + diff_ysize, y, real_ysize);
     591                MPN_ZERO (y, diff_ysize);
     592              }
     593            /* exp = negation of the total shift count, avoiding overflows. */
     594            exp = - ((mpfr_exp_t) diff_ysize * GMP_NUMB_BITS + count);
     595          }
     596        else
     597          {
     598            /* Shift {y, real_ysize} for (GMP_NUMB_BITS - count) bits to the
     599               right, and put the ysize most significant limbs into {y, ysize}.
     600               We have either real_ysize = ysize + 1 or real_ysize = ysize + 2
     601               (only possible with extra_limbs == 2). */
     602            MPFR_ASSERTD (diff_ysize == -1 ||
     603                          (extra_limbs == 2 && diff_ysize == -2));
     604            if (count != 0)
     605              {
     606                /* Before doing the shift, consider the limb that will entirely
     607                   be lost if real_ysize = ysize + 2. */
     608                exact = exact && (diff_ysize == -1 || y[0] == MPFR_LIMB_ZERO);
     609                /* mpn_rshift allows overlap, provided destination <= source */
     610                /* FIXME: The bits lost due to mpn_rshift are not taken
     611                   into account in the error analysis below! */
     612                if (mpn_rshift (y, y - (diff_ysize + 1), real_ysize,
     613                                GMP_NUMB_BITS - count) != MPFR_LIMB_ZERO)
     614                  exact = 0; /* some non-zero bits have been shifted out */
     615              }
     616            else
     617              {
     618                /* the case real_ysize = ysize + 2 with count = 0 cannot happen
     619                   even with GMP_NUMB_BITS = 8 since 62^2 < 256^2/2 */
     620                MPFR_ASSERTD (diff_ysize == -1);
     621                exact = exact && y[0] == MPFR_LIMB_ZERO;
     622                /* copy {y+real_ysize-ysize, ysize} to {y, ysize} */
     623                mpn_copyi (y, y + 1, real_ysize - 1);
     624              }
     625            /* exp = shift count */
     626            /* TODO: add some explanations about what exp means exactly. */
     627            exp = GMP_NUMB_BITS * (- diff_ysize) - count;
     628          }
     629  
     630        /* compute base^(exp_base - pstr_size) on n limbs */
     631        if (IS_POW2 (pstr->base))
     632          {
     633            /* Base: 2, 4, 8, 16, 32 */
     634            int pow2;
     635            mpfr_exp_t tmp;
     636  
     637            MPFR_LOG_MSG (("case 1 (base = power of 2)\n", 0));
     638  
     639            count_leading_zeros (pow2, (mp_limb_t) pstr->base);
     640            pow2 = GMP_NUMB_BITS - pow2 - 1; /* base = 2^pow2 */
     641            MPFR_ASSERTD (0 < pow2 && pow2 <= 5);
     642            /* exp += pow2 * (pstr->exp_base - pstr_size) + pstr->exp_bin
     643               with overflow checking
     644               and check that we can add/subtract 2 to exp without overflow */
     645            MPFR_SADD_OVERFLOW (tmp, pstr->exp_base, -(mpfr_exp_t) pstr_size,
     646                                mpfr_exp_t, mpfr_uexp_t,
     647                                MPFR_EXP_MIN, MPFR_EXP_MAX,
     648                                goto overflow, goto underflow);
     649            /* On some FreeBsd/Alpha, LONG_MIN/1 produced an exception
     650               so we used to check for this before doing the division.
     651               Since this bug is closed now (Nov 26, 2009), we remove
     652               that check
     653               <https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=72024> */
     654            if (tmp > 0 && MPFR_EXP_MAX / pow2 <= tmp)
     655              goto overflow;
     656            else if (tmp < 0 && MPFR_EXP_MIN / pow2 >= tmp)
     657              goto underflow;
     658            tmp *= pow2;
     659            MPFR_SADD_OVERFLOW (tmp, tmp, pstr->exp_bin,
     660                                mpfr_exp_t, mpfr_uexp_t,
     661                                MPFR_EXP_MIN, MPFR_EXP_MAX,
     662                                goto overflow, goto underflow);
     663            MPFR_SADD_OVERFLOW (exp, exp, tmp,
     664                                mpfr_exp_t, mpfr_uexp_t,
     665                                MPFR_EXP_MIN+2, MPFR_EXP_MAX-2,
     666                                goto overflow, goto underflow);
     667            result = y;
     668            err = 0;
     669          }
     670        /* case non-power-of-two-base, and pstr->exp_base > pstr_size */
     671        else if (pstr->exp_base > (mpfr_exp_t) pstr_size)
     672          {
     673            mp_limb_t *z;
     674            mpfr_exp_t exp_z;
     675  
     676            MPFR_LOG_MSG (("case 2 (exp_base > pstr_size)\n", 0));
     677  
     678            result = MPFR_TMP_LIMBS_ALLOC (2 * ysize + 1);
     679  
     680            /* z = base^(exp_base-sptr_size) using space allocated at y-ysize */
     681            z = y0;
     682            /* NOTE: exp_base-pstr_size can't overflow since pstr_size > 0 */
     683            err = mpfr_mpn_exp (z, &exp_z, pstr->base,
     684                                pstr->exp_base - pstr_size, ysize);
     685            if (err == -2)
     686              goto overflow;
     687            exact = exact && (err == -1);
     688  
     689            /* If exact is non zero, then z equals exactly the value of the
     690               pstr_size most significant digits from pstr->mant, i.e., the
     691               only difference can come from the neglected pstr->prec-pstr_size
     692               least significant digits of pstr->mant.
     693               If exact is zero, then z is rounded toward zero with respect
     694               to that value. */
     695  
     696            /* multiply(y = 0.mant[0]...mant[pr-1])_base by base^(exp-g):
     697               since both y and z are rounded toward zero, so is "result" */
     698            mpn_mul_n (result, y, z, ysize);
     699  
     700            /* compute the error on the product */
     701            if (err == -1)
     702              err = 0;
     703            err ++;
     704  
     705            /* compute the exponent of y */
     706            /* exp += exp_z + ysize_bits with overflow checking
     707               and check that we can add/subtract 2 to exp without overflow */
     708            MPFR_SADD_OVERFLOW (exp_z, exp_z, ysize_bits,
     709                                mpfr_exp_t, mpfr_uexp_t,
     710                                MPFR_EXP_MIN, MPFR_EXP_MAX,
     711                                goto overflow, goto underflow);
     712            MPFR_SADD_OVERFLOW (exp, exp, exp_z,
     713                                mpfr_exp_t, mpfr_uexp_t,
     714                                MPFR_EXP_MIN+2, MPFR_EXP_MAX-2,
     715                                goto overflow, goto underflow);
     716  
     717            /* normalize result */
     718            if (MPFR_LIMB_MSB (result[2 * ysize - 1]) == 0)
     719              {
     720                mp_limb_t *r = result + ysize - 1;
     721                mpn_lshift (r, r, ysize + 1, 1);
     722                /* Overflow checking not needed */
     723                exp --;
     724              }
     725  
     726            /* if the low ysize limbs of {result, 2*ysize} are all zero,
     727               then the result is still "exact" (if it was before) */
     728            exact = exact && (mpn_scan1 (result, 0) >= ysize_bits);
     729            result += ysize;
     730          }
     731        /* case exp_base < pstr_size */
     732        else if (pstr->exp_base < (mpfr_exp_t) pstr_size)
     733          {
     734            mp_limb_t *z;
     735            mpfr_exp_t exp_z;
     736  
     737            MPFR_LOG_MSG (("case 3 (exp_base < pstr_size)\n", 0));
     738  
     739            result = MPFR_TMP_LIMBS_ALLOC (3 * ysize + 1);
     740  
     741            /* y0 = y * K^ysize */
     742            MPN_ZERO (y0, ysize);
     743  
     744            /* pstr_size - pstr->exp_base can overflow */
     745            exp_z = pstr->exp_base == MPFR_EXP_MIN ?
     746              MPFR_EXP_MAX : -pstr->exp_base;  /* avoid integer overflow */
     747            MPFR_SADD_OVERFLOW (exp_z, (mpfr_exp_t) pstr_size, exp_z,
     748                                mpfr_exp_t, mpfr_uexp_t,
     749                                MPFR_EXP_MIN, MPFR_EXP_MAX,
     750                                goto underflow, goto overflow);
     751  
     752            /* (z, exp_z) = base^(pstr_size - exp_base) */
     753            z = result + 2*ysize + 1;
     754            err = mpfr_mpn_exp (z, &exp_z, pstr->base, exp_z, ysize);
     755  
     756            /* Now {z, ysize} * 2^(exp_z_out - ysize_bits) is an approximation
     757               to base^exp_z_in (denoted b^e below), rounded toward zero, with:
     758               * if err = -1, the result is exact;
     759               * if err = -2, an overflow occurred in the computation of exp_z;
     760               * otherwise the error is bounded by 2^err ulps.
     761               Thus the exact value of b^e is between z and z + 2^err, where
     762               z is {z, ysize} properly scaled by a power of 2. Then the error
     763               will be:
     764                 y/b^e - trunc(y/z) = eps1 + eps2
     765               with
     766                 eps1 = y/b^e - y/z <= 0
     767                 eps2 = y/z - trunc(y/z) >= 0
     768               thus the errors will (partly) compensate, giving a bound
     769               max(|eps1|,|eps2|).
     770               In addition, there is a 3rd error eps3 since y might be the
     771               conversion of only a part of the character string, and/or y
     772               might be truncated by the mpn_rshift call above:
     773                 eps3 = exact_y/b^e - y/b^e >= 0.
     774            */
     775            if (err == -2)
     776              goto underflow; /* FIXME: Sure? */
     777            else if (err == -1)
     778              err = 0; /* see the note below */
     779            else
     780              exact = 0;
     781  
     782            /* exp -= exp_z + ysize_bits with overflow checking
     783               and check that we can add/subtract 2 to exp without overflow */
     784            MPFR_SADD_OVERFLOW (exp_z, exp_z, ysize_bits,
     785                                mpfr_exp_t, mpfr_uexp_t,
     786                                MPFR_EXP_MIN, MPFR_EXP_MAX,
     787                                goto underflow, goto overflow);
     788            MPFR_SADD_OVERFLOW (exp, exp, -exp_z,
     789                                mpfr_exp_t, mpfr_uexp_t,
     790                                MPFR_EXP_MIN+2, MPFR_EXP_MAX-2,
     791                                goto overflow, goto underflow);
     792  
     793            /* Compute the integer division y/z rounded toward zero.
     794               The quotient will be put at result + ysize (size: ysize + 1),
     795               and the remainder at result (size: ysize).
     796               Both the dividend {y, 2*ysize} and the divisor {z, ysize} are
     797               normalized, i.e., the most significant bit of their most
     798               significant limb is 1. */
     799            MPFR_ASSERTD (MPFR_LIMB_MSB (y0[2 * ysize - 1]) != 0);
     800            MPFR_ASSERTD (MPFR_LIMB_MSB (z[ysize - 1]) != 0);
     801            mpn_tdiv_qr (result + ysize, result, (mp_size_t) 0, y0,
     802                         2 * ysize, z, ysize);
     803  
     804            /* The truncation error of the mpn_tdiv_qr call (eps2 above) is at
     805               most 1 ulp. Idem for the error eps3, which has the same sign,
     806               thus eps2 + eps3 <= 2 ulps.
     807               FIXME: For eps3, this is not obvious and should be explained.
     808               For the error eps1 coming from the approximation to b^e,
     809               we have (still up to a power-of-2 normalization):
     810               y/z - y/b^e = y * (b^e-z) / (z * b^e) <= y * 2^err / (z * b^e).
     811               We have to convert that error in terms of ulp(trunc(y/z)).
     812               We first have ulp(trunc(y/z)) = ulp(y/z).
     813  
     814               FIXME: There must be some discussion about the exponents,
     815                      because up to a power of 2, 1/2 <= |y/z| < 1 and
     816                      1 <= |y/z| < 2 are equivalent and give no information.
     817                      Moreover 1/2 <= b^e < 1 has not been explained and may
     818                      hide mistakes since one may have 1/2 <= z < 1 < b^e.
     819  
     820               Since both y and z are normalized, the quotient
     821               {result+ysize, ysize+1} has exactly ysize limbs, plus maybe one
     822               bit (this corresponds to the MPFR_ASSERTD below):
     823               * if the quotient has exactly ysize limbs, then 1/2 <= |y/z| < 1
     824                 (up to a power of 2) and since 1/2 <= b^e < 1, the error is at
     825                 most 2^(err+1) ulps;
     826               * if the quotient has one extra bit, then 1 <= |y/z| < 2
     827                 (up to a power of 2) and since 1/2 <= b^e < 1, the error is at
     828                 most 2^(err+2) ulps; but since we will shift the result right
     829                 below by one bit, the final error will be at most 2^(err+1) ulps
     830                 too.
     831  
     832               Thus the error is:
     833               * at most 2^(err+1) ulps for eps1
     834               * at most 2 ulps for eps2 + eps3, which is of opposite sign
     835               and we can bound the error by 2^(err+1) ulps in all cases.
     836  
     837               Note: If eps1 was 0, the error would be bounded by 2 ulps,
     838               thus replacing err = -1 by err = 0 above was the right thing
     839               to do, since 2^(0+1) = 2.
     840            */
     841            MPFR_ASSERTD (result[2 * ysize] <= 1);
     842  
     843            err += 1; /* see above for the explanation of the +1 term */
     844  
     845            /* if the remainder of the division is zero, then the result is
     846               still "exact" if it was before */
     847            exact = exact && (mpn_popcount (result, ysize) == 0);
     848  
     849            /* normalize result */
     850            if (result[2 * ysize] == MPFR_LIMB_ONE)
     851              {
     852                mp_limb_t *r = result + ysize;
     853  
     854                exact = exact && ((*r & MPFR_LIMB_ONE) == 0);
     855                mpn_rshift (r, r, ysize + 1, 1);
     856                /* Overflow Checking not needed */
     857                exp ++;
     858              }
     859            result += ysize;
     860          }
     861        /* case exp_base = pstr_size: no multiplication or division needed */
     862        else
     863          {
     864            MPFR_LOG_MSG (("case 4 (exp_base = pstr_size)\n", 0));
     865  
     866            /* base^(exp-pr) = 1             nothing to compute */
     867            result = y;
     868            err = 0;
     869          }
     870  
     871        MPFR_LOG_MSG (("exact = %d, err = %d, precx = %Pd\n",
     872                       exact, err, precx));
     873  
     874        /* at this point, result is an approximation rounded toward zero
     875           of the pstr_size most significant digits of pstr->mant, with
     876           equality in case exact is non-zero. */
     877  
     878        /* test if rounding is possible, and if so exit the loop.
     879           Note: we also need to be able to determine the correct ternary value,
     880           thus we use the precx + (rnd == MPFR_RNDN) trick.
     881           For example if result = xxx...xxx111...111 and rnd = RNDN,
     882           then we know the correct rounding is xxx...xx(x+1), but we cannot know
     883           the correct ternary value. */
     884        if (exact || mpfr_round_p (result, ysize, ysize_bits - err - 1,
     885                                   precx + (rnd == MPFR_RNDN)))
     886          break;
     887  
     888        /* update the prec for next loop */
     889        MPFR_ZIV_NEXT (loop, prec);
     890      } /* loop */
     891    MPFR_ZIV_FREE (loop);
     892  
     893    /* round y */
     894    if (mpfr_round_raw (MPFR_MANT (x), result, ysize_bits,
     895                        pstr->negative, precx, rnd, &res))
     896      {
     897        /* overflow when rounding y */
     898        MPFR_MANT (x)[MPFR_LIMB_SIZE (x) - 1] = MPFR_LIMB_HIGHBIT;
     899        /* Overflow Checking not needed */
     900        exp ++;
     901      }
     902  
     903    /* Note: if exact <> 0, then the approximation {result, ysize} is exact,
     904       thus no double-rounding can occur:
     905       (a) either the ternary value res is non-zero, and it is the correct
     906           ternary value that we should return
     907       (b) or the ternary value res is zero, and we should return 0. */
     908  
     909    /* Set sign of x before exp since check_range needs a valid sign */
     910    (pstr->negative) ? MPFR_SET_NEG (x) : MPFR_SET_POS (x);
     911  
     912    /* DO NOT USE MPFR_SET_EXP. The exp may be out of range! */
     913    MPFR_SADD_OVERFLOW (exp, exp, ysize_bits,
     914                        mpfr_exp_t, mpfr_uexp_t,
     915                        MPFR_EXP_MIN, MPFR_EXP_MAX,
     916                        goto overflow, goto underflow);
     917    MPFR_EXP (x) = exp;
     918    res = mpfr_check_range (x, res, rnd);
     919    goto end;
     920  
     921   underflow:
     922    /* This is called when there is a huge overflow
     923       (Real expo < MPFR_EXP_MIN << __gmpfr_emin */
     924    if (rnd == MPFR_RNDN)
     925      rnd = MPFR_RNDZ;
     926    res = mpfr_underflow (x, rnd, (pstr->negative) ? -1 : 1);
     927    goto end;
     928  
     929   overflow:
     930    res = mpfr_overflow (x, rnd, (pstr->negative) ? -1 : 1);
     931  
     932   end:
     933    MPFR_TMP_FREE (marker);
     934    return res;
     935  }
     936  
     937  static void
     938  free_parsed_string (struct parsed_string *pstr)
     939  {
     940    mpfr_free_func (pstr->mantissa, pstr->alloc);
     941  }
     942  
     943  int
     944  mpfr_strtofr (mpfr_ptr x, const char *string, char **end, int base,
     945                mpfr_rnd_t rnd)
     946  {
     947    int res;
     948    struct parsed_string pstr;
     949  
     950    /* For base <= 36, parsing is case-insensitive. */
     951    MPFR_ASSERTN (base == 0 || (base >= 2 && base <= 62));
     952  
     953    /* If an error occurred, it must return 0. */
     954    MPFR_SET_ZERO (x);
     955    MPFR_SET_POS (x);
     956  
     957    MPFR_STAT_STATIC_ASSERT (MPFR_MAX_BASE >= 62);
     958    res = parse_string (x, &pstr, &string, base);
     959    /* If res == 0, then it was exact (NAN or INF),
     960       so it is also the ternary value */
     961    if (MPFR_UNLIKELY (res == -1))  /* invalid data */
     962      res = 0;  /* x is set to 0, which is exact, thus ternary value is 0 */
     963    else if (res == 1)
     964      {
     965        res = parsed_string_to_mpfr (x, &pstr, rnd);
     966        free_parsed_string (&pstr);
     967      }
     968    else if (res == 2)
     969      res = mpfr_overflow (x, rnd, (pstr.negative) ? -1 : 1);
     970    MPFR_ASSERTD (res != 3);
     971  #if 0
     972    else if (res == 3)
     973      {
     974        /* This is called when there is a huge overflow
     975           (Real expo < MPFR_EXP_MIN << __gmpfr_emin */
     976        if (rnd == MPFR_RNDN)
     977          rnd = MPFR_RNDZ;
     978        res = mpfr_underflow (x, rnd, (pstr.negative) ? -1 : 1);
     979      }
     980  #endif
     981  
     982    if (end != NULL)
     983      *end = (char *) string;
     984    return res;
     985  }