1  /* mpfr_mul -- multiply two floating-point numbers
       2  
       3  Copyright 1999-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #define MPFR_NEED_LONGLONG_H
      24  #include "mpfr-impl.h"
      25  
      26  
      27  /********* BEGINNING CHECK *************/
      28  
      29  /* Check if we have to check the result of mpfr_mul.
      30     TODO: Find a better (and faster?) check than using old implementation */
      31  #if MPFR_WANT_ASSERT >= 2
      32  
      33  int mpfr_mul2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode);
      34  static int
      35  mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
      36  {
      37    /* Old implementation */
      38    int sign_product, cc, inexact;
      39    mpfr_exp_t ax;
      40    mp_limb_t *tmp;
      41    mp_limb_t b1;
      42    mpfr_prec_t bq, cq;
      43    mp_size_t bn, cn, tn, k;
      44    MPFR_TMP_DECL(marker);
      45  
      46    /* deal with special cases */
      47    if (MPFR_ARE_SINGULAR(b,c))
      48      {
      49        if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c))
      50          {
      51            MPFR_SET_NAN(a);
      52            MPFR_RET_NAN;
      53          }
      54        sign_product = MPFR_MULT_SIGN(MPFR_SIGN(b), MPFR_SIGN(c));
      55        if (MPFR_IS_INF(b))
      56          {
      57            if (MPFR_IS_INF(c) || MPFR_NOTZERO(c))
      58              {
      59                MPFR_SET_SIGN(a, sign_product);
      60                MPFR_SET_INF(a);
      61                MPFR_RET(0); /* exact */
      62              }
      63            else
      64              {
      65                MPFR_SET_NAN(a);
      66                MPFR_RET_NAN;
      67              }
      68          }
      69        else if (MPFR_IS_INF(c))
      70          {
      71            if (MPFR_NOTZERO(b))
      72              {
      73                MPFR_SET_SIGN(a, sign_product);
      74                MPFR_SET_INF(a);
      75                MPFR_RET(0); /* exact */
      76              }
      77            else
      78              {
      79                MPFR_SET_NAN(a);
      80                MPFR_RET_NAN;
      81              }
      82          }
      83        else
      84          {
      85            MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
      86            MPFR_SET_SIGN(a, sign_product);
      87            MPFR_SET_ZERO(a);
      88            MPFR_RET(0); /* 0 * 0 is exact */
      89          }
      90      }
      91    sign_product = MPFR_MULT_SIGN(MPFR_SIGN(b), MPFR_SIGN(c));
      92  
      93    ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
      94  
      95    bq = MPFR_PREC (b);
      96    cq = MPFR_PREC (c);
      97  
      98    MPFR_ASSERTN ((mpfr_uprec_t) bq + cq <= MPFR_PREC_MAX);
      99  
     100    bn = MPFR_PREC2LIMBS (bq); /* number of limbs of b */
     101    cn = MPFR_PREC2LIMBS (cq); /* number of limbs of c */
     102    k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
     103    tn = MPFR_PREC2LIMBS (bq + cq);
     104    /* <= k, thus no int overflow */
     105    MPFR_ASSERTD(tn <= k);
     106  
     107    /* Check for no size_t overflow*/
     108    MPFR_ASSERTD((size_t) k <= ((size_t) -1) / MPFR_BYTES_PER_MP_LIMB);
     109    MPFR_TMP_MARK(marker);
     110    tmp = MPFR_TMP_LIMBS_ALLOC (k);
     111  
     112    /* multiplies two mantissa in temporary allocated space */
     113    b1 = (MPFR_LIKELY(bn >= cn)) ?
     114      mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn)
     115      : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn);
     116  
     117    /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
     118       with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
     119    b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
     120    MPFR_ASSERTD (b1 == 0 || b1 == 1);
     121  
     122    /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
     123       then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
     124       and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
     125    tmp += k - tn;
     126    if (MPFR_UNLIKELY(b1 == 0))
     127      mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
     128    cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq,
     129                         MPFR_IS_NEG_SIGN(sign_product),
     130                         MPFR_PREC (a), rnd_mode, &inexact);
     131    MPFR_ASSERTD (cc == 0 || cc == 1);
     132  
     133    /* cc = 1 ==> result is a power of two */
     134    if (MPFR_UNLIKELY(cc))
     135      MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;
     136  
     137    MPFR_TMP_FREE(marker);
     138  
     139    {
     140      /* We need to cast b1 to a signed integer type in order to use
     141         signed integer arithmetic only, as the expression can involve
     142         negative integers. Let's recall that both b1 and cc are 0 or 1,
     143         and since cc is an int, let's choose int for this part. */
     144      mpfr_exp_t ax2 = ax + ((int) b1 - 1 + cc);
     145      if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
     146        return mpfr_overflow (a, rnd_mode, sign_product);
     147      if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
     148        {
     149          /* In the rounding to the nearest mode, if the exponent of the exact
     150             result (i.e. before rounding, i.e. without taking cc into account)
     151             is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
     152             both arguments are powers of 2) in absolute value, then round to
     153             zero. */
     154          if (rnd_mode == MPFR_RNDN &&
     155              (ax + (mpfr_exp_t) b1 < __gmpfr_emin ||
     156               (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
     157            rnd_mode = MPFR_RNDZ;
     158          return mpfr_underflow (a, rnd_mode, sign_product);
     159        }
     160      MPFR_SET_EXP (a, ax2);
     161      MPFR_SET_SIGN(a, sign_product);
     162    }
     163    MPFR_RET (inexact);
     164  }
     165  
     166  int
     167  mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
     168  {
     169    mpfr_t ta, tb, tc;
     170    mpfr_flags_t old_flags, flags1, flags2;
     171    int inexact1, inexact2;
     172  
     173    if (rnd_mode == MPFR_RNDF)
     174      return mpfr_mul2 (a, b, c, rnd_mode);
     175  
     176    old_flags = __gmpfr_flags;
     177  
     178    mpfr_init2 (ta, MPFR_PREC (a));
     179    mpfr_init2 (tb, MPFR_PREC (b));
     180    mpfr_init2 (tc, MPFR_PREC (c));
     181    MPFR_ASSERTN (mpfr_set (tb, b, MPFR_RNDN) == 0);
     182    MPFR_ASSERTN (mpfr_set (tc, c, MPFR_RNDN) == 0);
     183  
     184    /* Note: If b or c is NaN, then the NaN flag has been set by mpfr_set above.
     185       Thus restore the old flags just below to make sure that mpfr_mul3 is
     186       tested under the real conditions. */
     187  
     188    __gmpfr_flags = old_flags;
     189    inexact2 = mpfr_mul3 (ta, tb, tc, rnd_mode);
     190    flags2 = __gmpfr_flags;
     191  
     192    __gmpfr_flags = old_flags;
     193    inexact1 = mpfr_mul2 (a, b, c, rnd_mode);
     194    flags1 = __gmpfr_flags;
     195  
     196    /* Convert the ternary values to (-1,0,1). */
     197    inexact2 = VSIGN (inexact2);
     198    inexact1 = VSIGN (inexact1);
     199  
     200    if (! ((MPFR_IS_NAN (ta) && MPFR_IS_NAN (a)) || mpfr_equal_p (ta, a)) ||
     201        inexact1 != inexact2 || flags1 != flags2)
     202      {
     203        /* We do not have MPFR_PREC_FSPEC, so let's use mpfr_eexp_t and
     204           MPFR_EXP_FSPEC since mpfr_prec_t values are guaranteed to be
     205           representable in mpfr_exp_t, thus in mpfr_eexp_t. */
     206        fprintf (stderr, "mpfr_mul return different values for %s\n"
     207                 "Prec_a = %" MPFR_EXP_FSPEC "d, "
     208                 "Prec_b = %" MPFR_EXP_FSPEC "d, "
     209                 "Prec_c = %" MPFR_EXP_FSPEC "d\n",
     210                 mpfr_print_rnd_mode (rnd_mode),
     211                 (mpfr_eexp_t) MPFR_PREC (a),
     212                 (mpfr_eexp_t) MPFR_PREC (b),
     213                 (mpfr_eexp_t) MPFR_PREC (c));
     214        /* Note: We output tb and tc instead of b and c, in case a = b or c
     215           (this is why tb and tc have been created in the first place). */
     216        fprintf (stderr, "b = ");
     217        mpfr_fdump (stderr, tb);
     218        fprintf (stderr, "c = ");
     219        mpfr_fdump (stderr, tc);
     220        fprintf (stderr, "OldMul: ");
     221        mpfr_fdump (stderr, ta);
     222        fprintf (stderr, "NewMul: ");
     223        mpfr_fdump (stderr, a);
     224        fprintf (stderr, "OldMul: ternary = %2d, flags =", inexact2);
     225        flags_fout (stderr, flags2);
     226        fprintf (stderr, "NewMul: ternary = %2d, flags =", inexact1);
     227        flags_fout (stderr, flags1);
     228        MPFR_ASSERTN(0);
     229      }
     230  
     231    mpfr_clears (ta, tb, tc, (mpfr_ptr) 0);
     232    return inexact1;
     233  }
     234  
     235  # define mpfr_mul mpfr_mul2
     236  
     237  #endif  /* MPFR_WANT_ASSERT >= 2 */
     238  
     239  /****** END OF CHECK *******/
     240  
     241  /* Multiply 2 mpfr_t */
     242  
     243  #if !defined(MPFR_GENERIC_ABI)
     244  
     245  /* Disabled for now since the mul_1_extracted.c is not formally proven yet.
     246     Once it is proven, replace MPFR_WANT_PROVEN_CODExxx by MPFR_WANT_PROVEN_CODE. */
     247  #if defined(MPFR_WANT_PROVEN_CODExxx) && GMP_NUMB_BITS == 64 && \
     248    UINT_MAX == 0xffffffff && MPFR_PREC_BITS == 64 && \
     249    _MPFR_PREC_FORMAT == 3 && _MPFR_EXP_FORMAT == _MPFR_PREC_FORMAT
     250  
     251  /* The code assumes that mp_limb_t has 64 bits exactly, unsigned int
     252     has 32 bits exactly, mpfr_prec_t and mpfr_exp_t are of type long,
     253     which has 64 bits exactly. */
     254  
     255  #include "mul_1_extracted.c"
     256  
     257  #else
     258  
     259  /* Special code for prec(a) < GMP_NUMB_BITS and
     260     prec(b), prec(c) <= GMP_NUMB_BITS.
     261     Note: this code was copied in sqr.c, function mpfr_sqr_1 (this saves a few cycles
     262     with respect to have this function exported). As a consequence, any change here
     263     should be reported in mpfr_sqr_1. */
     264  static int
     265  mpfr_mul_1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
     266              mpfr_prec_t p)
     267  {
     268    mp_limb_t a0;
     269    mpfr_limb_ptr ap = MPFR_MANT(a);
     270    mp_limb_t b0 = MPFR_MANT(b)[0];
     271    mp_limb_t c0 = MPFR_MANT(c)[0];
     272    mpfr_exp_t ax;
     273    mpfr_prec_t sh = GMP_NUMB_BITS - p;
     274    mp_limb_t rb, sb, mask = MPFR_LIMB_MASK(sh);
     275  
     276    /* When prec(b), prec(c) <= GMP_NUMB_BITS / 2, we could replace umul_ppmm
     277       by a limb multiplication as follows, but we assume umul_ppmm is as fast
     278       as a limb multiplication on modern processors:
     279        a0 = (b0 >> (GMP_NUMB_BITS / 2)) * (c0 >> (GMP_NUMB_BITS / 2));
     280        sb = 0;
     281    */
     282    ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
     283    umul_ppmm (a0, sb, b0, c0);
     284    if (a0 < MPFR_LIMB_HIGHBIT)
     285      {
     286        ax --;
     287        /* TODO: This is actually an addition with carry (no shifts and no OR
     288           needed in asm). Make sure that GCC generates optimized code once
     289           it supports carry-in. */
     290        a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
     291        sb <<= 1;
     292      }
     293    rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
     294    sb |= (a0 & mask) ^ rb;
     295    ap[0] = a0 & ~mask;
     296  
     297    MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
     298  
     299    /* rounding */
     300    if (MPFR_UNLIKELY(ax > __gmpfr_emax))
     301      return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     302  
     303    /* Warning: underflow should be checked *after* rounding, thus when rounding
     304       away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
     305       a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
     306    if (MPFR_UNLIKELY(ax < __gmpfr_emin))
     307      {
     308        if (ax == __gmpfr_emin - 1 && ap[0] == MPFR_LIMB(~mask) &&
     309            ((rnd_mode == MPFR_RNDN && rb) ||
     310             (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
     311          goto rounding; /* no underflow */
     312        /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
     313           we have to change to RNDZ. This corresponds to:
     314           (a) either ax < emin - 1
     315           (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
     316        if (rnd_mode == MPFR_RNDN &&
     317            (ax < __gmpfr_emin - 1 ||
     318             (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
     319          rnd_mode = MPFR_RNDZ;
     320        return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
     321      }
     322  
     323   rounding:
     324    MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
     325                          in the cases "goto rounding" above. */
     326    if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
     327      {
     328        MPFR_ASSERTD(ax >= __gmpfr_emin);
     329        MPFR_RET (0);
     330      }
     331    else if (rnd_mode == MPFR_RNDN)
     332      {
     333        if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
     334          goto truncate;
     335        else
     336          goto add_one_ulp;
     337      }
     338    else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
     339      {
     340      truncate:
     341        MPFR_ASSERTD(ax >= __gmpfr_emin);
     342        MPFR_RET(-MPFR_SIGN(a));
     343      }
     344    else /* round away from zero */
     345      {
     346      add_one_ulp:
     347        ap[0] += MPFR_LIMB_ONE << sh;
     348        if (ap[0] == 0)
     349          {
     350            ap[0] = MPFR_LIMB_HIGHBIT;
     351            if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
     352              return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     353            MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
     354            MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
     355            MPFR_SET_EXP (a, ax + 1);
     356          }
     357        MPFR_RET(MPFR_SIGN(a));
     358      }
     359  }
     360  
     361  #endif /* MPFR_WANT_PROVEN_CODE */
     362  
     363  /* Special code for prec(a) = GMP_NUMB_BITS and
     364     prec(b), prec(c) <= GMP_NUMB_BITS. */
     365  static int
     366  mpfr_mul_1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
     367  {
     368    mp_limb_t a0;
     369    mpfr_limb_ptr ap = MPFR_MANT(a);
     370    mp_limb_t b0 = MPFR_MANT(b)[0];
     371    mp_limb_t c0 = MPFR_MANT(c)[0];
     372    mpfr_exp_t ax;
     373    mp_limb_t rb, sb;
     374  
     375    ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
     376    umul_ppmm (a0, sb, b0, c0);
     377    if (a0 < MPFR_LIMB_HIGHBIT)
     378      {
     379        ax --;
     380        /* TODO: This is actually an addition with carry (no shifts and no OR
     381           needed in asm). Make sure that GCC generates optimized code once
     382           it supports carry-in. */
     383        a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
     384        sb <<= 1;
     385      }
     386    rb = sb & MPFR_LIMB_HIGHBIT;
     387    sb = sb & ~MPFR_LIMB_HIGHBIT;
     388    ap[0] = a0;
     389  
     390    MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
     391  
     392    /* rounding */
     393    if (MPFR_UNLIKELY(ax > __gmpfr_emax))
     394      return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     395  
     396    /* Warning: underflow should be checked *after* rounding, thus when rounding
     397       away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
     398       a >= 0.111...111[1]*2^(emin-1), there is no underflow.
     399       Note: this case can only occur when the initial a0 (after the umul_ppmm
     400       call above) had its most significant bit 0, since the largest a0 is
     401       obtained for b0 = c0 = B-1 where B=2^GMP_NUMB_BITS, thus b0*c0 <= (B-1)^2
     402       thus a0 <= B-2. */
     403    if (MPFR_UNLIKELY(ax < __gmpfr_emin))
     404      {
     405        if (ax == __gmpfr_emin - 1 && ap[0] == ~MPFR_LIMB_ZERO &&
     406            ((rnd_mode == MPFR_RNDN && rb) ||
     407             (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
     408          goto rounding; /* no underflow */
     409        /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
     410           we have to change to RNDZ. This corresponds to:
     411           (a) either ax < emin - 1
     412           (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
     413        if (rnd_mode == MPFR_RNDN &&
     414            (ax < __gmpfr_emin - 1 ||
     415             (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
     416          rnd_mode = MPFR_RNDZ;
     417        return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
     418      }
     419  
     420   rounding:
     421    MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
     422                          in the cases "goto rounding" above. */
     423    if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
     424      {
     425        MPFR_ASSERTD(ax >= __gmpfr_emin);
     426        MPFR_RET (0);
     427      }
     428    else if (rnd_mode == MPFR_RNDN)
     429      {
     430        if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
     431          goto truncate;
     432        else
     433          goto add_one_ulp;
     434      }
     435    else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
     436      {
     437      truncate:
     438        MPFR_ASSERTD(ax >= __gmpfr_emin);
     439        MPFR_RET(-MPFR_SIGN(a));
     440      }
     441    else /* round away from zero */
     442      {
     443      add_one_ulp:
     444        ap[0] += MPFR_LIMB_ONE;
     445        if (ap[0] == 0)
     446          {
     447            ap[0] = MPFR_LIMB_HIGHBIT;
     448            if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
     449              return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     450            MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
     451            MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
     452            MPFR_SET_EXP (a, ax + 1);
     453          }
     454        MPFR_RET(MPFR_SIGN(a));
     455      }
     456  }
     457  
     458  /* Special code for GMP_NUMB_BITS < prec(a) < 2*GMP_NUMB_BITS and
     459     GMP_NUMB_BITS < prec(b), prec(c) <= 2*GMP_NUMB_BITS.
     460     Note: this code was copied in sqr.c, function mpfr_sqr_2 (this saves a few cycles
     461     with respect to have this function exported). As a consequence, any change here
     462     should be reported in mpfr_sqr_2. */
     463  static int
     464  mpfr_mul_2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
     465              mpfr_prec_t p)
     466  {
     467    mp_limb_t h, l, u, v, w;
     468    mpfr_limb_ptr ap = MPFR_MANT(a);
     469    mpfr_exp_t ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
     470    mpfr_prec_t sh = 2 * GMP_NUMB_BITS - p;
     471    mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
     472    mp_limb_t *bp = MPFR_MANT(b), *cp = MPFR_MANT(c);
     473  
     474    /* we store the 4-limb product in h=ap[1], l=ap[0], sb=ap[-1], sb2=ap[-2] */
     475    umul_ppmm (h, l, bp[1], cp[1]);
     476    umul_ppmm (u, v, bp[1], cp[0]);
     477    l += u;
     478    h += (l < u);
     479    umul_ppmm (u, w, bp[0], cp[1]);
     480    l += u;
     481    h += (l < u);
     482  
     483    /* now the full product is {h, l, v + w + high(b0*c0), low(b0*c0)},
     484       where the lower part contributes to less than 3 ulps to {h, l} */
     485  
     486    /* If h has its most significant bit set and the low sh-1 bits of l are not
     487       000...000 nor 111...111 nor 111...110, then we can round correctly;
     488       if h has zero as most significant bit, we have to shift left h and l,
     489       thus if the low sh-2 bits are not 000...000 nor 111...111 nor 111...110,
     490       then we can round correctly. To avoid an extra test we consider the latter
     491       case (if we can round, we can also round in the former case).
     492       For sh <= 3, we have mask <= 7, thus (mask>>2) <= 1, and the approximation
     493       cannot be enough. */
     494    if (MPFR_LIKELY(((l + 2) & (mask >> 2)) > 2))
     495      sb = sb2 = 1; /* result cannot be exact in that case */
     496    else
     497      {
     498        umul_ppmm (sb, sb2, bp[0], cp[0]);
     499        /* the full product is {h, l, sb + v + w, sb2} */
     500        sb += v;
     501        l += (sb < v);
     502        h += (l == 0) && (sb < v);
     503        sb += w;
     504        l += (sb < w);
     505        h += (l == 0) && (sb < w);
     506      }
     507    if (h < MPFR_LIMB_HIGHBIT)
     508      {
     509        ax --;
     510        h = (h << 1) | (l >> (GMP_NUMB_BITS - 1));
     511        l = (l << 1) | (sb >> (GMP_NUMB_BITS - 1));
     512        sb <<= 1;
     513        /* no need to shift sb2 since we only want to know if it is zero or not */
     514      }
     515    ap[1] = h;
     516    rb = l & (MPFR_LIMB_ONE << (sh - 1));
     517    sb |= ((l & mask) ^ rb) | sb2;
     518    ap[0] = l & ~mask;
     519  
     520    MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
     521  
     522    /* rounding */
     523    if (MPFR_UNLIKELY(ax > __gmpfr_emax))
     524      return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     525  
     526    /* Warning: underflow should be checked *after* rounding, thus when rounding
     527       away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
     528       a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
     529    if (MPFR_UNLIKELY(ax < __gmpfr_emin))
     530      {
     531        if (ax == __gmpfr_emin - 1 &&
     532            ap[1] == MPFR_LIMB_MAX &&
     533            ap[0] == MPFR_LIMB(~mask) &&
     534            ((rnd_mode == MPFR_RNDN && rb) ||
     535             (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
     536          goto rounding; /* no underflow */
     537        /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
     538           we have to change to RNDZ */
     539        if (rnd_mode == MPFR_RNDN &&
     540            (ax < __gmpfr_emin - 1 ||
     541             (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0 && (rb | sb) == 0)))
     542              rnd_mode = MPFR_RNDZ;
     543        return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
     544      }
     545  
     546   rounding:
     547    MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
     548                          in the cases "goto rounding" above. */
     549    if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
     550      {
     551        MPFR_ASSERTD(ax >= __gmpfr_emin);
     552        MPFR_RET (0);
     553      }
     554    else if (rnd_mode == MPFR_RNDN)
     555      {
     556        if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
     557          goto truncate;
     558        else
     559          goto add_one_ulp;
     560      }
     561    else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
     562      {
     563      truncate:
     564        MPFR_ASSERTD(ax >= __gmpfr_emin);
     565        MPFR_RET(-MPFR_SIGN(a));
     566      }
     567    else /* round away from zero */
     568      {
     569      add_one_ulp:
     570        ap[0] += MPFR_LIMB_ONE << sh;
     571        ap[1] += (ap[0] == 0);
     572        if (ap[1] == 0)
     573          {
     574            ap[1] = MPFR_LIMB_HIGHBIT;
     575            if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
     576              return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     577            MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
     578            MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
     579            MPFR_SET_EXP (a, ax + 1);
     580          }
     581        MPFR_RET(MPFR_SIGN(a));
     582      }
     583  }
     584  
     585  /* Special code for 2*GMP_NUMB_BITS < prec(a) < 3*GMP_NUMB_BITS and
     586     2*GMP_NUMB_BITS < prec(b), prec(c) <= 3*GMP_NUMB_BITS. */
     587  static int
     588  mpfr_mul_3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
     589              mpfr_prec_t p)
     590  {
     591    mp_limb_t a0, a1, a2, h, l, cy;
     592    mpfr_limb_ptr ap = MPFR_MANT(a);
     593    mpfr_exp_t ax = MPFR_GET_EXP(b) + MPFR_GET_EXP(c);
     594    mpfr_prec_t sh = 3 * GMP_NUMB_BITS - p;
     595    mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
     596    mp_limb_t *bp = MPFR_MANT(b), *cp = MPFR_MANT(c);
     597  
     598    /* we store the upper 3-limb product in a2, a1, a0:
     599       b2*c2, b2*c1+b1*c2, b2*c0+b1*c1+b0*c2 */
     600    umul_ppmm (a2, a1, bp[2], cp[2]);
     601    umul_ppmm (h, a0, bp[2], cp[1]);
     602    a1 += h;
     603    a2 += (a1 < h);
     604    umul_ppmm (h, l, bp[1], cp[2]);
     605    a1 += h;
     606    a2 += (a1 < h);
     607    a0 += l;
     608    cy = a0 < l; /* carry in a1 */
     609    umul_ppmm (h, l, bp[2], cp[0]);
     610    a0 += h;
     611    cy += (a0 < h);
     612    umul_ppmm (h, l, bp[1], cp[1]);
     613    a0 += h;
     614    cy += (a0 < h);
     615    umul_ppmm (h, l, bp[0], cp[2]);
     616    a0 += h;
     617    cy += (a0 < h);
     618    /* now propagate cy */
     619    a1 += cy;
     620    a2 += (a1 < cy);
     621  
     622    /* Now the approximate product {a2, a1, a0} has an error of less than
     623       5 ulps (3 ulps for the ignored low limbs of b2*c0+b1*c1+b0*c2,
     624       plus 2 ulps for the ignored b1*c0+b0*c1 (plus b0*c0)).
     625       Since we might shift by 1 bit, we make sure the low sh-2 bits of a0
     626       are not 0, -1, -2, -3 or -4. */
     627  
     628    if (MPFR_LIKELY(((a0 + 4) & (mask >> 2)) > 4))
     629      sb = sb2 = 1; /* result cannot be exact in that case */
     630    else
     631      {
     632        mp_limb_t p[6];
     633        mpn_mul_n (p, bp, cp, 3);
     634        a2 = p[5];
     635        a1 = p[4];
     636        a0 = p[3];
     637        sb = p[2];
     638        sb2 = p[1] | p[0];
     639      }
     640    if (a2 < MPFR_LIMB_HIGHBIT)
     641      {
     642        ax --;
     643        a2 = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
     644        a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
     645        a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
     646        sb <<= 1;
     647        /* no need to shift sb2: we only need to know if it is zero or not */
     648      }
     649    ap[2] = a2;
     650    ap[1] = a1;
     651    rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
     652    sb |= ((a0 & mask) ^ rb) | sb2;
     653    ap[0] = a0 & ~mask;
     654  
     655    MPFR_SIGN(a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
     656  
     657    /* rounding */
     658    if (MPFR_UNLIKELY(ax > __gmpfr_emax))
     659      return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     660  
     661    /* Warning: underflow should be checked *after* rounding, thus when rounding
     662       away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
     663       a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
     664    if (MPFR_UNLIKELY(ax < __gmpfr_emin))
     665      {
     666        if (ax == __gmpfr_emin - 1 &&
     667            ap[2] == MPFR_LIMB_MAX &&
     668            ap[1] == MPFR_LIMB_MAX &&
     669            ap[0] == MPFR_LIMB(~mask) &&
     670            ((rnd_mode == MPFR_RNDN && rb) ||
     671             (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
     672          goto rounding; /* no underflow */
     673        /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
     674           we have to change to RNDZ */
     675        if (rnd_mode == MPFR_RNDN &&
     676            (ax < __gmpfr_emin - 1 ||
     677             (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0
     678              && (rb | sb) == 0)))
     679          rnd_mode = MPFR_RNDZ;
     680        return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a));
     681      }
     682  
     683   rounding:
     684    MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
     685                          in the cases "goto rounding" above. */
     686    if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
     687      {
     688        MPFR_ASSERTD(ax >= __gmpfr_emin);
     689        MPFR_RET (0);
     690      }
     691    else if (rnd_mode == MPFR_RNDN)
     692      {
     693        if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
     694          goto truncate;
     695        else
     696          goto add_one_ulp;
     697      }
     698    else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
     699      {
     700      truncate:
     701        MPFR_ASSERTD(ax >= __gmpfr_emin);
     702        MPFR_RET(-MPFR_SIGN(a));
     703      }
     704    else /* round away from zero */
     705      {
     706      add_one_ulp:
     707        ap[0] += MPFR_LIMB_ONE << sh;
     708        ap[1] += (ap[0] == 0);
     709        ap[2] += (ap[1] == 0) && (ap[0] == 0);
     710        if (ap[2] == 0)
     711          {
     712            ap[2] = MPFR_LIMB_HIGHBIT;
     713            if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
     714              return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
     715            MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
     716            MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
     717            MPFR_SET_EXP (a, ax + 1);
     718          }
     719        MPFR_RET(MPFR_SIGN(a));
     720      }
     721  }
     722  
     723  #endif /* !defined(MPFR_GENERIC_ABI) */
     724  
     725  /* Note: mpfr_sqr will call mpfr_mul if bn > MPFR_SQR_THRESHOLD,
     726     in order to use Mulders' mulhigh, which is handled only here
     727     to avoid partial code duplication. There is some overhead due
     728     to the additional tests, but slowdown should not be noticeable
     729     as this code is not executed in very small precisions. */
     730  
     731  MPFR_HOT_FUNCTION_ATTR int
     732  mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
     733  {
     734    int sign, inexact;
     735    mpfr_exp_t ax, ax2;
     736    mp_limb_t *tmp;
     737    mp_limb_t b1;
     738    mpfr_prec_t aq, bq, cq;
     739    mp_size_t bn, cn, tn, k, threshold;
     740    MPFR_TMP_DECL (marker);
     741  
     742    MPFR_LOG_FUNC
     743      (("b[%Pd]=%.*Rg c[%Pd]=%.*Rg rnd=%d",
     744        mpfr_get_prec (b), mpfr_log_prec, b,
     745        mpfr_get_prec (c), mpfr_log_prec, c, rnd_mode),
     746       ("a[%Pd]=%.*Rg inexact=%d",
     747        mpfr_get_prec (a), mpfr_log_prec, a, inexact));
     748  
     749    /* deal with special cases */
     750    if (MPFR_ARE_SINGULAR (b, c))
     751      {
     752        if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
     753          {
     754            MPFR_SET_NAN (a);
     755            MPFR_RET_NAN;
     756          }
     757        sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
     758        if (MPFR_IS_INF (b))
     759          {
     760            if (!MPFR_IS_ZERO (c))
     761              {
     762                MPFR_SET_SIGN (a, sign);
     763                MPFR_SET_INF (a);
     764                MPFR_RET (0);
     765              }
     766            else
     767              {
     768                MPFR_SET_NAN (a);
     769                MPFR_RET_NAN;
     770              }
     771          }
     772        else if (MPFR_IS_INF (c))
     773          {
     774            if (!MPFR_IS_ZERO (b))
     775              {
     776                MPFR_SET_SIGN (a, sign);
     777                MPFR_SET_INF (a);
     778                MPFR_RET(0);
     779              }
     780            else
     781              {
     782                MPFR_SET_NAN (a);
     783                MPFR_RET_NAN;
     784              }
     785          }
     786        else
     787          {
     788            MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
     789            MPFR_SET_SIGN (a, sign);
     790            MPFR_SET_ZERO (a);
     791            MPFR_RET (0);
     792          }
     793      }
     794  
     795    aq = MPFR_GET_PREC (a);
     796    bq = MPFR_GET_PREC (b);
     797    cq = MPFR_GET_PREC (c);
     798  
     799  #if !defined(MPFR_GENERIC_ABI)
     800    if (aq == bq && aq == cq)
     801      {
     802        if (aq < GMP_NUMB_BITS)
     803          return mpfr_mul_1 (a, b, c, rnd_mode, aq);
     804  
     805        if (GMP_NUMB_BITS < aq && aq < 2 * GMP_NUMB_BITS)
     806          return mpfr_mul_2 (a, b, c, rnd_mode, aq);
     807  
     808        if (aq == GMP_NUMB_BITS)
     809          return mpfr_mul_1n (a, b, c, rnd_mode);
     810  
     811        if (2 * GMP_NUMB_BITS < aq && aq < 3 * GMP_NUMB_BITS)
     812          return mpfr_mul_3 (a, b, c, rnd_mode, aq);
     813      }
     814  #endif
     815  
     816    sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
     817  
     818    ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
     819    /* Note: the exponent of the exact result will be e = bx + cx + ec with
     820       ec in {-1,0,1} and the following assumes that e is representable. */
     821  
     822    /* FIXME: Useful since we do an exponent check after?
     823     * It is useful iff the precision is big, there is an overflow
     824     * and we are doing further mults...*/
     825  #ifdef HUGE
     826    if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
     827      return mpfr_overflow (a, rnd_mode, sign);
     828    if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
     829      return mpfr_underflow (a, rnd_mode == MPFR_RNDN ? MPFR_RNDZ : rnd_mode,
     830                             sign);
     831  #endif
     832  
     833    MPFR_ASSERTN ((mpfr_uprec_t) bq + cq <= MPFR_PREC_MAX);
     834  
     835    bn = MPFR_PREC2LIMBS (bq); /* number of limbs of b */
     836    cn = MPFR_PREC2LIMBS (cq); /* number of limbs of c */
     837    k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
     838    tn = MPFR_PREC2LIMBS (bq + cq);
     839    MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */
     840  
     841    /* Check for no size_t overflow. */
     842    MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / MPFR_BYTES_PER_MP_LIMB);
     843    MPFR_TMP_MARK (marker);
     844    tmp = MPFR_TMP_LIMBS_ALLOC (k);
     845  
     846    /* multiplies two mantissa in temporary allocated space */
     847    if (MPFR_UNLIKELY (bn < cn))
     848      {
     849        mpfr_srcptr z = b;
     850        mp_size_t zn  = bn;
     851        b = c;
     852        bn = cn;
     853        c = z;
     854        cn = zn;
     855      }
     856    MPFR_ASSERTD (bn >= cn);
     857    if (bn <= 2)
     858      {
     859        /* The 3 cases perform the same first operation. */
     860        umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
     861        if (bn == 1)
     862          {
     863            /* 1 limb * 1 limb */
     864            b1 = tmp[1];
     865          }
     866        else if (MPFR_UNLIKELY (cn == 1))
     867          {
     868            /* 2 limbs * 1 limb */
     869            mp_limb_t t;
     870            umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
     871            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t);
     872            b1 = tmp[2];
     873          }
     874        else
     875          {
     876            /* 2 limbs * 2 limbs */
     877            mp_limb_t t1, t2, t3;
     878            /* First 2 limbs * 1 limb */
     879            umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
     880            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1);
     881            /* Second, the other 2 limbs * 1 limb product */
     882            umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]);
     883            umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]);
     884            add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3);
     885            /* Sum those two partial products */
     886            add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2);
     887            tmp[3] += (tmp[2] < t1);
     888            b1 = tmp[3];
     889          }
     890        b1 >>= (GMP_NUMB_BITS - 1);
     891        tmp += k - tn;
     892        if (MPFR_UNLIKELY (b1 == 0))
     893          mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
     894      }
     895    else /* bn >= cn and bn >= 3 */
     896      /* Mulders' mulhigh. This code can also be used via mpfr_sqr,
     897         hence the tests b != c. */
     898      if (MPFR_UNLIKELY (cn > (threshold = b != c ?
     899                               MPFR_MUL_THRESHOLD : MPFR_SQR_THRESHOLD)))
     900        {
     901          mp_limb_t *bp, *cp;
     902          mp_size_t n;
     903          mpfr_prec_t p;
     904  
     905          /* First check if we can reduce the precision of b or c:
     906             exact values are a nightmare for the short product trick */
     907          bp = MPFR_MANT (b);
     908          cp = MPFR_MANT (c);
     909          MPFR_STAT_STATIC_ASSERT (MPFR_MUL_THRESHOLD >= 1 &&
     910                                   MPFR_SQR_THRESHOLD >= 1);
     911          if (MPFR_UNLIKELY ((bp[0] == 0 && bp[1] == 0) ||
     912                             (cp[0] == 0 && cp[1] == 0)))
     913            {
     914              mpfr_t b_tmp, c_tmp;
     915  
     916              MPFR_TMP_FREE (marker);
     917              /* Check for b */
     918              while (*bp == 0)
     919                {
     920                  bp++;
     921                  bn--;
     922                  MPFR_ASSERTD (bn > 0);
     923                } /* This must end since the most significant limb is != 0 */
     924  
     925              /* Check for c too: if b == c, this will do nothing */
     926              while (*cp == 0)
     927                {
     928                  cp++;
     929                  cn--;
     930                  MPFR_ASSERTD (cn > 0);
     931                } /* This must end since the most significant limb is != 0 */
     932  
     933              /* It is not the fastest way, but it is safer. */
     934              MPFR_SET_SAME_SIGN (b_tmp, b);
     935              MPFR_SET_EXP (b_tmp, MPFR_GET_EXP (b));
     936              MPFR_PREC (b_tmp) = bn * GMP_NUMB_BITS;
     937              MPFR_MANT (b_tmp) = bp;
     938  
     939              if (b != c)
     940                {
     941                  MPFR_SET_SAME_SIGN (c_tmp, c);
     942                  MPFR_SET_EXP (c_tmp, MPFR_GET_EXP (c));
     943                  MPFR_PREC (c_tmp) = cn * GMP_NUMB_BITS;
     944                  MPFR_MANT (c_tmp) = cp;
     945  
     946                  /* Call again mpfr_mul with the fixed arguments */
     947                  return mpfr_mul (a, b_tmp, c_tmp, rnd_mode);
     948                }
     949              else
     950                /* Call mpfr_mul instead of mpfr_sqr as the precision
     951                   is probably still high enough. It is thus better to call
     952                   mpfr_mul again, but it should not give an infinite loop
     953                   if we call mpfr_sqr. */
     954                return mpfr_mul (a, b_tmp, b_tmp, rnd_mode);
     955            }
     956  
     957          /* Compute estimated precision of mulhigh.
     958             We could use `+ (n < cn) + (n < bn)' instead of `+ 2',
     959             but does it worth it? */
     960          n = MPFR_LIMB_SIZE (a) + 1;
     961          n = MIN (n, cn);
     962          MPFR_ASSERTD (n >= 1 && 2*n <= k && n <= cn && n <= bn);
     963          p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
     964          bp += bn - n;
     965          cp += cn - n;
     966  
     967          /* Check if MulHigh can produce a roundable result.
     968             We may lose 1 bit due to RNDN, 1 due to final shift. */
     969          if (MPFR_UNLIKELY (aq > p - 5))
     970            {
     971              if (MPFR_UNLIKELY (aq > p - 5 + GMP_NUMB_BITS
     972                                 || bn <= threshold + 1))
     973                {
     974                  /* MulHigh can't produce a roundable result. */
     975                  MPFR_LOG_MSG (("mpfr_mulhigh can't be used (%lu VS %lu)\n",
     976                                 aq, p));
     977                  goto full_multiply;
     978                }
     979              /* Add one extra limb to mantissa of b and c. */
     980              if (bn > n)
     981                bp --;
     982              else
     983                {
     984                  bp = MPFR_TMP_LIMBS_ALLOC (n + 1);
     985                  bp[0] = 0;
     986                  MPN_COPY (bp + 1, MPFR_MANT (b) + bn - n, n);
     987                }
     988              if (b != c)
     989                {
     990  #if GMP_NUMB_BITS <= 32
     991                  if (cn > n)
     992                    cp --; /* This can only happen on a 32-bit computer,
     993                              and is very unlikely to happen.
     994                              Indeed, since n = MIN (an + 1, cn), with
     995                              an = MPFR_LIMB_SIZE(a), we can have cn > n
     996                              only when n = an + 1 < cn.
     997                              We are in the case aq > p - 5, with
     998                              aq = PREC(a) = an*W - sh, with W = GMP_NUMB_BITS
     999                              and 0 <= sh < W, and p = n*W - ceil(log2(n+2)),
    1000                              thus an*W - sh > n*W - ceil(log2(n+2)) - 5.
    1001                              Thus n < an + (ceil(log2(n+2)) + 5 - sh)/W.
    1002                              To get n = an + 1, we need
    1003                              ceil(log2(n+2)) + 5 - sh > W, thus since sh>=0
    1004                              we need ceil(log2(n+2)) + 5 > W.
    1005                              With W=32 this can only happen for n>=2^27-1,
    1006                              thus for a precision of 2^32-64 for a,
    1007                              and with W=64 for n>=2^59-1, which would give
    1008                              a precision >= 2^64. */
    1009                  else
    1010  #endif
    1011                    {
    1012                      cp = MPFR_TMP_LIMBS_ALLOC (n + 1);
    1013                      cp[0] = 0;
    1014                      MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n);
    1015                    }
    1016                }
    1017              /* We will compute with one extra limb */
    1018              n++;
    1019              /* ceil(log2(n+2)) takes into account the lost bits due to
    1020                 Mulders' short product */
    1021              p = n * GMP_NUMB_BITS - MPFR_INT_CEIL_LOG2 (n + 2);
    1022              /* Due to some nasty reasons we can have only 4 bits */
    1023              MPFR_ASSERTD (aq <= p - 4);
    1024  
    1025              if (MPFR_LIKELY (k < 2*n))
    1026                {
    1027                  tmp = MPFR_TMP_LIMBS_ALLOC (2 * n);
    1028                  tmp += 2*n-k; /* `tmp' still points to an area of `k' limbs */
    1029                }
    1030            }
    1031          MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", aq, p));
    1032          /* Compute an approximation of the product of b and c */
    1033          if (b != c)
    1034            mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n);
    1035          else
    1036            mpfr_sqrhigh_n (tmp + k - 2 * n, bp, n);
    1037          /* now tmp[k-n]..tmp[k-1] contains an approximation of the n upper
    1038             limbs of the product, with tmp[k-1] >= 2^(GMP_NUMB_BITS-2) */
    1039          b1 = tmp[k-1] >> (GMP_NUMB_BITS - 1); /* msb from the product */
    1040  
    1041          /* If the mantissas of b and c are uniformly distributed in (1/2, 1],
    1042             then their product is in (1/4, 1/2] with probability 2*ln(2)-1
    1043             ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
    1044          if (MPFR_UNLIKELY (b1 == 0))
    1045            /* Warning: the mpfr_mulhigh_n call above only surely affects
    1046               tmp[k-n-1..k-1], thus we shift only those limbs */
    1047            mpn_lshift (tmp + k - n - 1, tmp + k - n - 1, n + 1, 1);
    1048          tmp += k - tn;
    1049          /* now the approximation is in tmp[tn-n]...tmp[tn-1] */
    1050          MPFR_ASSERTD (MPFR_LIMB_MSB (tmp[tn-1]) != 0);
    1051  
    1052          /* for RNDF, we simply use RNDZ, since anyway here we multiply numbers
    1053             with large precisions, thus the overhead of RNDZ is small */
    1054          if (rnd_mode == MPFR_RNDF)
    1055            rnd_mode = MPFR_RNDZ;
    1056  
    1057          /* if the most significant bit b1 is zero, we have only p-1 correct
    1058             bits */
    1059          if (MPFR_UNLIKELY (!mpfr_round_p (tmp, tn, p + b1 - 1,
    1060                                            aq + (rnd_mode == MPFR_RNDN))))
    1061            {
    1062              tmp -= k - tn; /* tmp may have changed, FIX IT!!!!! */
    1063              goto full_multiply;
    1064            }
    1065        }
    1066      else
    1067        {
    1068        full_multiply:
    1069          MPFR_LOG_MSG (("Use mpn_mul\n", 0));
    1070          b1 = mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn);
    1071  
    1072          /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
    1073             with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
    1074          b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
    1075  
    1076          /* if the mantissas of b and c are uniformly distributed in (1/2, 1],
    1077             then their product is in (1/4, 1/2] with probability 2*ln(2)-1
    1078             ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
    1079          tmp += k - tn;
    1080          if (MPFR_UNLIKELY (b1 == 0))
    1081            mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
    1082        }
    1083  
    1084    /* b1 is 0 or 1 (most significant bit from the raw product) */
    1085    ax2 = ax + ((int) b1 - 1);
    1086    MPFR_RNDRAW (inexact, a, tmp, bq + cq, rnd_mode, sign, ax2++);
    1087    MPFR_TMP_FREE (marker);
    1088    MPFR_EXP (a) = ax2; /* Can't use MPFR_SET_EXP: Expo may be out of range */
    1089    MPFR_SET_SIGN (a, sign);
    1090    if (MPFR_UNLIKELY (ax2 > __gmpfr_emax))
    1091      return mpfr_overflow (a, rnd_mode, sign);
    1092    if (MPFR_UNLIKELY (ax2 < __gmpfr_emin))
    1093      {
    1094        /* In the rounding to the nearest mode, if the exponent of the exact
    1095           result (i.e. before rounding, i.e. without taking cc into account)
    1096           is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
    1097           both arguments are powers of 2), then round to zero. */
    1098        if (rnd_mode == MPFR_RNDN
    1099            && (ax + (mpfr_exp_t) b1 < __gmpfr_emin
    1100                || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
    1101          rnd_mode = MPFR_RNDZ;
    1102        return mpfr_underflow (a, rnd_mode, sign);
    1103      }
    1104    MPFR_RET (inexact);
    1105  }