1  /* mpfr_set_d -- convert a machine double precision float to
       2                   a multiple precision floating-point number
       3  
       4  Copyright 1999-2004, 2006-2023 Free Software Foundation, Inc.
       5  Contributed by the AriC and Caramba projects, INRIA.
       6  
       7  This file is part of the GNU MPFR Library.
       8  
       9  The GNU MPFR Library is free software; you can redistribute it and/or modify
      10  it under the terms of the GNU Lesser General Public License as published by
      11  the Free Software Foundation; either version 3 of the License, or (at your
      12  option) any later version.
      13  
      14  The GNU MPFR Library is distributed in the hope that it will be useful, but
      15  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      16  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      17  License for more details.
      18  
      19  You should have received a copy of the GNU Lesser General Public License
      20  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      21  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      22  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      23  
      24  #include <float.h>  /* For DOUBLE_ISINF and DOUBLE_ISNAN */
      25  
      26  #define MPFR_NEED_LONGLONG_H
      27  #include "mpfr-impl.h"
      28  
      29  /* Extracts the bits of |d| in rp[0..n-1] where n=ceil(53/GMP_NUMB_BITS).
      30     Assumes d finite and <> 0.
      31     Returns the corresponding exponent such that |d| = {rp, n} * 2^exp,
      32     with the value of {rp, n} in [1/2, 1).
      33     The int type should be sufficient for exp.
      34  */
      35  static int
      36  extract_double (mpfr_limb_ptr rp, double d)
      37  {
      38    int exp;
      39    mp_limb_t man[MPFR_LIMBS_PER_DOUBLE];
      40  
      41    /* FIXME: Generalize to handle GMP_NUMB_BITS < 16. */
      42  
      43    MPFR_ASSERTD(!DOUBLE_ISNAN(d));
      44    MPFR_ASSERTD(!DOUBLE_ISINF(d));
      45    MPFR_ASSERTD(d != 0.0);
      46  
      47  #if _MPFR_IEEE_FLOATS
      48  
      49    {
      50      union mpfr_ieee_double_extract x;
      51      x.d = d;
      52  
      53      exp = x.s.exp;
      54      if (exp)
      55        {
      56          /* x.s.manh has 20 bits (in its low bits), x.s.manl has 32 bits */
      57  #if GMP_NUMB_BITS >= 64
      58          man[0] = ((MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)) |
      59                    ((mp_limb_t) x.s.manh << (GMP_NUMB_BITS - 21)) |
      60                    ((mp_limb_t) x.s.manl << (GMP_NUMB_BITS - 53)));
      61  #elif GMP_NUMB_BITS == 32
      62          man[1] = (MPFR_LIMB_ONE << 31) | (x.s.manh << 11) | (x.s.manl >> 21);
      63          man[0] = x.s.manl << 11;
      64  #elif GMP_NUMB_BITS == 16
      65          MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 16);
      66          man[3] = (MPFR_LIMB_ONE << 15) | (x.s.manh >> 5);
      67          man[2] = (x.s.manh << 11) | (x.s.manl >> 21);
      68          man[1] = x.s.manl >> 5;
      69          man[0] = MPFR_LIMB_LSHIFT(x.s.manl,11);
      70  #else
      71          MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 8);
      72          man[6] = (MPFR_LIMB_ONE << 7) | (x.s.manh >> 13);
      73          man[5] = (mp_limb_t) (x.s.manh >> 5);
      74          man[4] = MPFR_LIMB_LSHIFT(x.s.manh, 3) | (mp_limb_t) (x.s.manl >> 29);
      75          man[3] = (mp_limb_t) (x.s.manl >> 21);
      76          man[2] = (mp_limb_t) (x.s.manl >> 13);
      77          man[1] = (mp_limb_t) (x.s.manl >> 5);
      78          man[0] = MPFR_LIMB_LSHIFT(x.s.manl,3);
      79  #endif
      80          exp -= 1022;
      81        }
      82      else /* subnormal number */
      83        {
      84          int cnt;
      85          exp = -1021;
      86  #if GMP_NUMB_BITS >= 64
      87          man[0] = (((mp_limb_t) x.s.manh << (GMP_NUMB_BITS - 21)) |
      88                    ((mp_limb_t) x.s.manl << (GMP_NUMB_BITS - 53)));
      89          count_leading_zeros (cnt, man[0]);
      90  #elif GMP_NUMB_BITS == 32
      91          man[1] = (x.s.manh << 11) /* high 21 bits */
      92            | (x.s.manl >> 21); /* middle 11 bits */
      93          man[0] = x.s.manl << 11; /* low 21 bits */
      94          if (man[1] == 0)
      95            {
      96              man[1] = man[0];
      97              man[0] = 0;
      98              exp -= GMP_NUMB_BITS;
      99            }
     100          count_leading_zeros (cnt, man[1]);
     101          man[1] = (man[1] << cnt) |
     102            (cnt != 0 ? man[0] >> (GMP_NUMB_BITS - cnt) : 0);
     103  #elif GMP_NUMB_BITS == 16
     104          man[3] = x.s.manh >> 5;
     105          man[2] = (x.s.manh << 11) | (x.s.manl >> 21);
     106          man[1] = x.s.manl >> 5;
     107          man[0] = x.s.manl << 11;
     108          while (man[3] == 0) /* d is assumed <> 0 */
     109            {
     110              man[3] = man[2];
     111              man[2] = man[1];
     112              man[1] = man[0];
     113              man[0] = 0;
     114              exp -= GMP_NUMB_BITS;
     115            }
     116          count_leading_zeros (cnt, man[3]);
     117          if (cnt)
     118            {
     119              man[3] = (man[3] << cnt) | (man[2] >> (GMP_NUMB_BITS - cnt));
     120              man[2] = (man[2] << cnt) | (man[1] >> (GMP_NUMB_BITS - cnt));
     121              man[1] = (man[1] << cnt) | (man[0] >> (GMP_NUMB_BITS - cnt));
     122            }
     123  #else
     124          MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 8);
     125          man[6] = x.s.manh >> 13;
     126          man[5] = x.s.manh >> 5;
     127          man[4] = (x.s.manh << 3) | (x.s.manl >> 29);
     128          man[3] = x.s.manl >> 21;
     129          man[2] = x.s.manl >> 13;
     130          man[1] = x.s.manl >> 5;
     131          man[0] = x.s.manl << 3;
     132          while (man[6] == 0) /* d is assumed <> 0 */
     133            {
     134              man[6] = man[5];
     135              man[5] = man[4];
     136              man[4] = man[3];
     137              man[3] = man[2];
     138              man[2] = man[1];
     139              man[1] = man[0];
     140              man[0] = 0;
     141              exp -= GMP_NUMB_BITS;
     142            }
     143          count_leading_zeros (cnt, man[6]);
     144          if (cnt)
     145            {
     146              int i;
     147              for (i = 6; i > 0; i--)
     148                man[i] = (man[i] << cnt) | (man[i-1] >> (GMP_NUMB_BITS - cnt));
     149            }
     150  #endif
     151          man[0] <<= cnt;
     152          exp -= cnt;
     153        }
     154    }
     155  
     156  #else /* _MPFR_IEEE_FLOATS */
     157  
     158    {
     159      /* Unknown (or known to be non-IEEE) double format.  */
     160      exp = 0;
     161      d = ABS (d);
     162      if (d >= 1.0)
     163        {
     164          while (d >= 32768.0)
     165            {
     166              d *= (1.0 / 65536.0);
     167              exp += 16;
     168            }
     169          while (d >= 1.0)
     170            {
     171              d *= 0.5;
     172              exp += 1;
     173            }
     174        }
     175      else if (d < 0.5)
     176        {
     177          while (d < (1.0 / 65536.0))
     178            {
     179              d *=  65536.0;
     180              exp -= 16;
     181            }
     182          while (d < 0.5)
     183            {
     184              d *= 2.0;
     185              exp -= 1;
     186            }
     187        }
     188  
     189      d *= MP_BASE_AS_DOUBLE;
     190  #if GMP_NUMB_BITS >= 64
     191  #ifndef __clang__
     192      man[0] = d;
     193  #else
     194      /* clang up to version 11 produces an invalid exception when d >= 2^63,
     195         see <https://github.com/llvm/llvm-project/issues/18060>
     196         (old URL: <https://bugs.llvm.org/show_bug.cgi?id=17686>).
     197         Since this is always the case, here, we use the following patch. */
     198      MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
     199      man[0] = 0x8000000000000000 + (mp_limb_t) (d - 0x8000000000000000);
     200  #endif /* __clang__ */
     201  #elif GMP_NUMB_BITS == 32
     202      man[1] = (mp_limb_t) d;
     203      man[0] = (mp_limb_t) ((d - man[1]) * MP_BASE_AS_DOUBLE);
     204  #else
     205      MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 16);
     206      {
     207        double r = d;
     208        man[3] = (mp_limb_t) r;
     209        r = (r - man[3]) * MP_BASE_AS_DOUBLE;
     210        man[2] = (mp_limb_t) r;
     211        r = (r - man[2]) * MP_BASE_AS_DOUBLE;
     212        man[1] = (mp_limb_t) r;
     213        r = (r - man[1]) * MP_BASE_AS_DOUBLE;
     214        man[0] = (mp_limb_t) r;
     215      }
     216  #endif
     217    }
     218  
     219  #endif /* _MPFR_IEEE_FLOATS */
     220  
     221    rp[0] = man[0];
     222  #if GMP_NUMB_BITS <= 32
     223    rp[1] = man[1];
     224  #endif
     225  #if GMP_NUMB_BITS <= 16
     226    rp[2] = man[2];
     227    rp[3] = man[3];
     228  #endif
     229  #if GMP_NUMB_BITS <= 8
     230    rp[4] = man[4];
     231    rp[5] = man[5];
     232    rp[6] = man[6];
     233  #endif
     234  
     235    MPFR_ASSERTD((rp[MPFR_LIMBS_PER_DOUBLE - 1] & MPFR_LIMB_HIGHBIT) != 0);
     236  
     237    return exp;
     238  }
     239  
     240  /* End of part included from gmp-2.0.2 */
     241  
     242  int
     243  mpfr_set_d (mpfr_ptr r, double d, mpfr_rnd_t rnd_mode)
     244  {
     245    int inexact;
     246    mpfr_t tmp;
     247    mp_limb_t tmpmant[MPFR_LIMBS_PER_DOUBLE];
     248    MPFR_SAVE_EXPO_DECL (expo);
     249  
     250    if (MPFR_UNLIKELY(DOUBLE_ISNAN(d)))
     251      {
     252        /* we don't propagate the sign bit */
     253        MPFR_SET_NAN(r);
     254        MPFR_RET_NAN;
     255      }
     256    else if (MPFR_UNLIKELY(d == 0))
     257      {
     258  #if _MPFR_IEEE_FLOATS
     259        union mpfr_ieee_double_extract x;
     260  
     261        MPFR_SET_ZERO(r);
     262        /* set correct sign */
     263        x.d = d;
     264        if (x.s.sig == 1)
     265          MPFR_SET_NEG(r);
     266        else
     267          MPFR_SET_POS(r);
     268  #else /* _MPFR_IEEE_FLOATS */
     269        MPFR_SET_ZERO(r);
     270        {
     271          /* This is to get the sign of zero on non-IEEE hardware
     272             Some systems support +0.0, -0.0, and unsigned zero.
     273             Some other systems may just have an unsigned zero.
     274             We can't use d == +0.0 since it should be always true,
     275             so we check that the memory representation of d is the
     276             same as +0.0, etc.
     277             Note: r is set to -0 only if d is detected as a negative zero
     278             *and*, for the double type, -0 has a different representation
     279             from +0. If -0.0 has several representations, the code below
     280             may not work as expected, but this is hardly fixable in a
     281             portable way (without depending on a math library) and only
     282             the sign could be incorrect. Such systems should be taken
     283             into account on a case-by-case basis. If the code is changed
     284             here, set_d64.c code should be updated too. */
     285          double poszero = +0.0, negzero = DBL_NEG_ZERO;
     286          if (memcmp(&d, &poszero, sizeof(double)) == 0)
     287            MPFR_SET_POS(r);
     288          else if (memcmp(&d, &negzero, sizeof(double)) == 0)
     289            MPFR_SET_NEG(r);
     290          else
     291            MPFR_SET_POS(r);
     292        }
     293  #endif /* _MPFR_IEEE_FLOATS */
     294        return 0; /* 0 is exact */
     295      }
     296    else if (MPFR_UNLIKELY(DOUBLE_ISINF(d)))
     297      {
     298        MPFR_SET_INF(r);
     299        if (d > 0)
     300          MPFR_SET_POS(r);
     301        else
     302          MPFR_SET_NEG(r);
     303        return 0; /* infinity is exact */
     304      }
     305  
     306    /* now d is neither 0, nor NaN nor Inf */
     307  
     308    MPFR_SAVE_EXPO_MARK (expo);
     309  
     310    /* warning: don't use tmp=r here, even if SIZE(r) >= MPFR_LIMBS_PER_DOUBLE,
     311       since PREC(r) may be different from PREC(tmp), and then both variables
     312       would have same precision in the mpfr_set4 call below. */
     313    MPFR_MANT(tmp) = tmpmant;
     314    MPFR_PREC(tmp) = IEEE_DBL_MANT_DIG;
     315  
     316    /* don't use MPFR_SET_EXP here since the exponent may be out of range */
     317    MPFR_EXP(tmp) = extract_double (tmpmant, d);
     318  
     319    /* tmp is exact since PREC(tmp)=53 */
     320    inexact = mpfr_set4 (r, tmp, rnd_mode,
     321                         (d < 0) ? MPFR_SIGN_NEG : MPFR_SIGN_POS);
     322  
     323    MPFR_SAVE_EXPO_FREE (expo);
     324    return mpfr_check_range (r, inexact, rnd_mode);
     325  }
     326  
     327  
     328