1  /* mpfr_set_ld -- convert a machine long double to
       2                    a multiple precision floating-point number
       3  
       4  Copyright 2002-2023 Free Software Foundation, Inc.
       5  Contributed by the AriC and Caramba projects, INRIA.
       6  
       7  This file is part of the GNU MPFR Library.
       8  
       9  The GNU MPFR Library is free software; you can redistribute it and/or modify
      10  it under the terms of the GNU Lesser General Public License as published by
      11  the Free Software Foundation; either version 3 of the License, or (at your
      12  option) any later version.
      13  
      14  The GNU MPFR Library is distributed in the hope that it will be useful, but
      15  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      16  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      17  License for more details.
      18  
      19  You should have received a copy of the GNU Lesser General Public License
      20  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      21  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      22  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      23  
      24  #include <float.h> /* needed so that MPFR_LDBL_MANT_DIG is correctly defined */
      25  
      26  #define MPFR_NEED_LONGLONG_H
      27  #include "mpfr-impl.h"
      28  
      29  /* To check for +inf, one can use the test x > LDBL_MAX, as LDBL_MAX is
      30     the maximum finite number representable in a long double, according
      31     to DR 467; see
      32       https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2092.htm
      33     If this fails on some platform, a test x - x != 0 might be used. */
      34  
      35  #if defined(HAVE_LDOUBLE_IS_DOUBLE)
      36  
      37  /* the "long double" format is the same as "double" */
      38  int
      39  mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
      40  {
      41    return mpfr_set_d (r, (double) d, rnd_mode);
      42  }
      43  
      44  #elif defined(HAVE_LDOUBLE_IEEE_EXT_LITTLE)
      45  
      46  #if GMP_NUMB_BITS >= 64
      47  # define MPFR_LIMBS_PER_LONG_DOUBLE 1
      48  #elif GMP_NUMB_BITS == 32
      49  # define MPFR_LIMBS_PER_LONG_DOUBLE 2
      50  #elif GMP_NUMB_BITS == 16
      51  # define MPFR_LIMBS_PER_LONG_DOUBLE 4
      52  #elif GMP_NUMB_BITS == 8
      53  # define MPFR_LIMBS_PER_LONG_DOUBLE 8
      54  #else
      55  #error "GMP_NUMB_BITS is assumed to be 8, 16, 32 or >= 64"
      56  #endif
      57  /* The hypothetical GMP_NUMB_BITS == 16 is not supported. It will trigger
      58     an error below. */
      59  
      60  /* IEEE Extended Little Endian Code */
      61  int
      62  mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
      63  {
      64    int inexact, k, cnt;
      65    mpfr_t tmp;
      66    mp_limb_t tmpmant[MPFR_LIMBS_PER_LONG_DOUBLE];
      67    mpfr_long_double_t x;
      68    mpfr_exp_t exp;
      69    int signd;
      70    MPFR_SAVE_EXPO_DECL (expo);
      71  
      72    /* Check for NAN */
      73    if (MPFR_UNLIKELY (DOUBLE_ISNAN (d)))
      74      {
      75        /* we don't propagate the sign bit */
      76        MPFR_SET_NAN (r);
      77        MPFR_RET_NAN;
      78      }
      79    /* Check for INF */
      80    else if (MPFR_UNLIKELY (d > LDBL_MAX))
      81      {
      82        MPFR_SET_INF (r);
      83        MPFR_SET_POS (r);
      84        return 0;
      85      }
      86    else if (MPFR_UNLIKELY (d < -LDBL_MAX))
      87      {
      88        MPFR_SET_INF (r);
      89        MPFR_SET_NEG (r);
      90        return 0;
      91      }
      92    /* Check for ZERO */
      93    else if (MPFR_UNLIKELY (d == 0.0))
      94      {
      95        x.ld = d;
      96        MPFR_SET_ZERO (r);
      97        if (x.s.sign == 1)
      98          MPFR_SET_NEG(r);
      99        else
     100          MPFR_SET_POS(r);
     101        return 0;
     102      }
     103  
     104    /* now d is neither 0, nor NaN nor Inf */
     105    MPFR_SAVE_EXPO_MARK (expo);
     106  
     107    MPFR_MANT (tmp) = tmpmant;
     108    MPFR_PREC (tmp) = 64;
     109  
     110    /* Extract sign */
     111    x.ld = d;
     112    signd = MPFR_SIGN_POS;
     113    if (x.ld < 0.0)
     114      {
     115        signd = MPFR_SIGN_NEG;
     116        x.ld = -x.ld;
     117      }
     118  
     119    /* Extract and normalize the significand */
     120  #if MPFR_LIMBS_PER_LONG_DOUBLE == 1
     121    tmpmant[0] = ((mp_limb_t) x.s.manh << 32) | ((mp_limb_t) x.s.manl);
     122    count_leading_zeros (cnt, tmpmant[0]);
     123    tmpmant[0] <<= cnt;
     124    k = 0; /* number of limbs shifted */
     125  #else /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
     126  #if MPFR_LIMBS_PER_LONG_DOUBLE == 2
     127    tmpmant[0] = (mp_limb_t) x.s.manl;
     128    tmpmant[1] = (mp_limb_t) x.s.manh;
     129  #elif MPFR_LIMBS_PER_LONG_DOUBLE == 4
     130    tmpmant[0] = (mp_limb_t) x.s.manl;
     131    tmpmant[1] = (mp_limb_t) (x.s.manl >> 16);
     132    tmpmant[2] = (mp_limb_t) x.s.manh;
     133    tmpmant[3] = (mp_limb_t) (x.s.manh >> 16);
     134  #elif MPFR_LIMBS_PER_LONG_DOUBLE == 8
     135    tmpmant[0] = (mp_limb_t) x.s.manl;
     136    tmpmant[1] = (mp_limb_t) (x.s.manl >> 8);
     137    tmpmant[2] = (mp_limb_t) (x.s.manl >> 16);
     138    tmpmant[3] = (mp_limb_t) (x.s.manl >> 24);
     139    tmpmant[4] = (mp_limb_t) x.s.manh;
     140    tmpmant[5] = (mp_limb_t) (x.s.manh >> 8);
     141    tmpmant[6] = (mp_limb_t) (x.s.manh >> 16);
     142    tmpmant[7] = (mp_limb_t) (x.s.manh >> 24);
     143  #else
     144  #error "MPFR_LIMBS_PER_LONG_DOUBLE should be 1, 2, 4 or 8"
     145  #endif /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
     146    {
     147      int i = MPFR_LIMBS_PER_LONG_DOUBLE;
     148      MPN_NORMALIZE_NOT_ZERO (tmpmant, i);
     149      k = MPFR_LIMBS_PER_LONG_DOUBLE - i;
     150      count_leading_zeros (cnt, tmpmant[i - 1]);
     151      if (cnt != 0)
     152        mpn_lshift (tmpmant + k, tmpmant, i, cnt);
     153      else if (k != 0)
     154        /* since we copy {tmpmant, i} into {tmpmant + k, i}, we should work
     155           decreasingly, thus call mpn_copyd */
     156        mpn_copyd (tmpmant + k, tmpmant, i);
     157      if (k != 0)
     158        MPN_ZERO (tmpmant, k);
     159    }
     160  #endif /* MPFR_LIMBS_PER_LONG_DOUBLE == 1 */
     161  
     162    /* Set exponent */
     163    exp = (mpfr_exp_t) ((x.s.exph << 8) + x.s.expl);  /* 15-bit unsigned int */
     164    if (MPFR_UNLIKELY (exp == 0))
     165      exp -= 0x3FFD;
     166    else
     167      exp -= 0x3FFE;
     168  
     169    MPFR_SET_EXP (tmp, exp - cnt - k * GMP_NUMB_BITS);
     170  
     171    /* tmp is exact */
     172    inexact = mpfr_set4 (r, tmp, rnd_mode, signd);
     173  
     174    MPFR_SAVE_EXPO_FREE (expo);
     175    return mpfr_check_range (r, inexact, rnd_mode);
     176  }
     177  
     178  #elif defined(HAVE_LDOUBLE_MAYBE_DOUBLE_DOUBLE)
     179  
     180  /* double-double code, see
     181     https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libgcc/config/rs6000/ibm-ldouble-format;h=e8ada17f7696cd942e710d5b67d4149f5fcccf45;hb=HEAD */
     182  int
     183  mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
     184  {
     185    mpfr_t t, u;
     186    int inexact;
     187    double h, l;
     188    MPFR_SAVE_EXPO_DECL (expo);
     189  
     190    /* Check for NAN. Since we can't use isnan(), we rely on the
     191       LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */
     192    LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; });
     193  
     194    /* Check for INF */
     195    if (d > LDBL_MAX)
     196      {
     197        mpfr_set_inf (r, 1);
     198        return 0;
     199      }
     200    else if (d < -LDBL_MAX)
     201      {
     202        mpfr_set_inf (r, -1);
     203        return 0;
     204      }
     205    /* Check for ZERO */
     206    else if (d == 0.0)
     207      return mpfr_set_d (r, (double) d, rnd_mode);
     208  
     209    if (d >= LDBL_MAX || d <= -LDBL_MAX)
     210      h = (d >= LDBL_MAX) ? LDBL_MAX : -LDBL_MAX;
     211    else
     212      h = (double) d; /* should not overflow */
     213    l = (double) (d - (long double) h);
     214  
     215    MPFR_SAVE_EXPO_MARK (expo);
     216  
     217    mpfr_init2 (t, IEEE_DBL_MANT_DIG);
     218    mpfr_init2 (u, IEEE_DBL_MANT_DIG);
     219  
     220    inexact = mpfr_set_d (t, h, MPFR_RNDN);
     221    MPFR_ASSERTN(inexact == 0);
     222    inexact = mpfr_set_d (u, l, MPFR_RNDN);
     223    MPFR_ASSERTN(inexact == 0);
     224    inexact = mpfr_add (r, t, u, rnd_mode);
     225  
     226    mpfr_clear (t);
     227    mpfr_clear (u);
     228  
     229    MPFR_SAVE_EXPO_FREE (expo);
     230    inexact = mpfr_check_range (r, inexact, rnd_mode);
     231    return inexact;
     232  }
     233  
     234  #else
     235  
     236  /* Generic code */
     237  int
     238  mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
     239  {
     240    mpfr_t t, u;
     241    int inexact, shift_exp;
     242    long double x;
     243    MPFR_SAVE_EXPO_DECL (expo);
     244  
     245    /* Check for NAN. Since we can't use isnan(), we rely on the
     246       LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */
     247    LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; });
     248  
     249    /* Check for INF */
     250    if (d > LDBL_MAX)
     251      {
     252        mpfr_set_inf (r, 1);
     253        return 0;
     254      }
     255    else if (d < -LDBL_MAX)
     256      {
     257        mpfr_set_inf (r, -1);
     258        return 0;
     259      }
     260    /* Check for ZERO */
     261    else if (d == 0.0)
     262      return mpfr_set_d (r, (double) d, rnd_mode);
     263  
     264    mpfr_init2 (t, MPFR_LDBL_MANT_DIG);
     265    mpfr_init2 (u, IEEE_DBL_MANT_DIG);
     266  
     267    MPFR_SAVE_EXPO_MARK (expo);
     268  
     269   convert:
     270    x = d;
     271    MPFR_SET_ZERO (t);  /* The sign doesn't matter. */
     272    shift_exp = 0; /* invariant: remainder to deal with is d*2^shift_exp */
     273    while (x != (long double) 0.0)
     274      {
     275        /* Check overflow of double */
     276        if (x > (long double) DBL_MAX || (-x) > (long double) DBL_MAX)
     277          {
     278            long double div9, div10, div11, div12, div13;
     279  
     280  #define TWO_64 18446744073709551616.0 /* 2^64 */
     281  #define TWO_128 (TWO_64 * TWO_64)
     282  #define TWO_256 (TWO_128 * TWO_128)
     283            div9 = (long double) (double) (TWO_256 * TWO_256); /* 2^(2^9) */
     284            div10 = div9 * div9;
     285            div11 = div10 * div10; /* 2^(2^11) */
     286            div12 = div11 * div11; /* 2^(2^12) */
     287            div13 = div12 * div12; /* 2^(2^13) */
     288            if (ABS (x) >= div13)
     289              {
     290                x /= div13; /* exact */
     291                shift_exp += 8192;
     292                mpfr_div_2si (t, t, 8192, MPFR_RNDZ);
     293              }
     294            if (ABS (x) >= div12)
     295              {
     296                x /= div12; /* exact */
     297                shift_exp += 4096;
     298                mpfr_div_2si (t, t, 4096, MPFR_RNDZ);
     299              }
     300            if (ABS (x) >= div11)
     301              {
     302                x /= div11; /* exact */
     303                shift_exp += 2048;
     304                mpfr_div_2si (t, t, 2048, MPFR_RNDZ);
     305              }
     306            if (ABS (x) >= div10)
     307              {
     308                x /= div10; /* exact */
     309                shift_exp += 1024;
     310                mpfr_div_2si (t, t, 1024, MPFR_RNDZ);
     311              }
     312            /* warning: we may have DBL_MAX=2^1024*(1-2^(-53)) < x < 2^1024,
     313               therefore we have one extra exponent reduction step */
     314            if (ABS (x) >= div9)
     315              {
     316                x /= div9; /* exact */
     317                shift_exp += 512;
     318                mpfr_div_2si (t, t, 512, MPFR_RNDZ);
     319              }
     320          } /* Check overflow of double */
     321        else /* no overflow on double */
     322          {
     323            long double div9, div10, div11;
     324  
     325            div9 = (long double) (double) 7.4583407312002067432909653e-155;
     326            /* div9 = 2^(-2^9) */
     327            div10 = div9  * div9;  /* 2^(-2^10) */
     328            div11 = div10 * div10; /* 2^(-2^11) if extended precision */
     329            /* since -DBL_MAX <= x <= DBL_MAX, the cast to double should not
     330               overflow here */
     331            if (ABS(x) < div10 &&
     332                div11 != (long double) 0.0 &&
     333                div11 / div10 == div10) /* possible underflow */
     334              {
     335                long double div12, div13;
     336                /* After the divisions, any bit of x must be >= div10,
     337                   hence the possible division by div9. */
     338                div12 = div11 * div11; /* 2^(-2^12) */
     339                div13 = div12 * div12; /* 2^(-2^13) */
     340                if (ABS (x) <= div13)
     341                  {
     342                    x /= div13; /* exact */
     343                    shift_exp -= 8192;
     344                    mpfr_mul_2si (t, t, 8192, MPFR_RNDZ);
     345                  }
     346                if (ABS (x) <= div12)
     347                  {
     348                    x /= div12; /* exact */
     349                    shift_exp -= 4096;
     350                    mpfr_mul_2si (t, t, 4096, MPFR_RNDZ);
     351                  }
     352                if (ABS (x) <= div11)
     353                  {
     354                    x /= div11; /* exact */
     355                    shift_exp -= 2048;
     356                    mpfr_mul_2si (t, t, 2048, MPFR_RNDZ);
     357                  }
     358                if (ABS (x) <= div10)
     359                  {
     360                    x /= div10; /* exact */
     361                    shift_exp -= 1024;
     362                    mpfr_mul_2si (t, t, 1024, MPFR_RNDZ);
     363                  }
     364                if (ABS(x) <= div9)
     365                  {
     366                    x /= div9;  /* exact */
     367                    shift_exp -= 512;
     368                    mpfr_mul_2si (t, t, 512, MPFR_RNDZ);
     369                  }
     370              }
     371            else /* no underflow */
     372              {
     373                inexact = mpfr_set_d (u, (double) x, MPFR_RNDZ);
     374                MPFR_ASSERTD (inexact == 0);
     375                if (mpfr_add (t, t, u, MPFR_RNDZ) != 0)
     376                  {
     377                    if (!mpfr_number_p (t))
     378                      break;
     379                    /* Inexact. This cannot happen unless the C implementation
     380                       "lies" on the precision or when long doubles are
     381                       implemented with FP expansions like double-double on
     382                       PowerPC. */
     383                    if (MPFR_PREC (t) != MPFR_PREC (r) + 1)
     384                      {
     385                        /* We assume that MPFR_PREC (r) < MPFR_PREC_MAX.
     386                           The precision MPFR_PREC (r) + 1 allows us to
     387                           deduce the rounding bit and the sticky bit. */
     388                        mpfr_set_prec (t, MPFR_PREC (r) + 1);
     389                        goto convert;
     390                      }
     391                    else
     392                      {
     393                        mp_limb_t *tp;
     394                        int rb_mask;
     395  
     396                        /* Since mpfr_add was inexact, the sticky bit is 1. */
     397                        tp = MPFR_MANT (t);
     398                        rb_mask = MPFR_LIMB_ONE <<
     399                          (GMP_NUMB_BITS - 1 -
     400                           (MPFR_PREC (r) & (GMP_NUMB_BITS - 1)));
     401                        if (rnd_mode == MPFR_RNDN)
     402                          rnd_mode = (*tp & rb_mask) ^ MPFR_IS_NEG (t) ?
     403                            MPFR_RNDU : MPFR_RNDD;
     404                        *tp |= rb_mask;
     405                        break;
     406                      }
     407                  }
     408                x -= (long double) mpfr_get_d1 (u); /* exact */
     409              }
     410          }
     411      }
     412    inexact = mpfr_mul_2si (r, t, shift_exp, rnd_mode);
     413    mpfr_clear (t);
     414    mpfr_clear (u);
     415  
     416    MPFR_SAVE_EXPO_FREE (expo);
     417    return mpfr_check_range (r, inexact, rnd_mode);
     418  }
     419  
     420  #endif