(root)/
mpfr-4.2.1/
src/
fma.c
       1  /* mpfr_fma -- Floating multiply-add
       2  
       3  Copyright 2001-2002, 2004, 2006-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #define MPFR_NEED_LONGLONG_H
      24  #include "mpfr-impl.h"
      25  
      26  /* The fused-multiply-add (fma) of x, y and z is defined by:
      27     fma(x,y,z)= x*y + z
      28  */
      29  
      30  /* this function deals with all cases where inputs are singular, i.e.,
      31     either NaN, Inf or zero */
      32  static int
      33  mpfr_fma_singular (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
      34                     mpfr_rnd_t rnd_mode)
      35  {
      36    if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
      37      {
      38        MPFR_SET_NAN(s);
      39        MPFR_RET_NAN;
      40      }
      41    /* now neither x, y or z is NaN */
      42    else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
      43      {
      44        /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
      45        if ((MPFR_IS_ZERO(y)) ||
      46            (MPFR_IS_ZERO(x)) ||
      47            (MPFR_IS_INF(z) &&
      48             ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
      49          {
      50            MPFR_SET_NAN(s);
      51            MPFR_RET_NAN;
      52          }
      53        else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
      54          {
      55            MPFR_SET_INF(s);
      56            MPFR_SET_SAME_SIGN(s, z);
      57            MPFR_RET(0);
      58          }
      59        else /* z is finite */
      60          {
      61            MPFR_SET_INF(s);
      62            MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y)));
      63            MPFR_RET(0);
      64          }
      65      }
      66    /* now x and y are finite */
      67    else if (MPFR_IS_INF(z))
      68      {
      69        MPFR_SET_INF(s);
      70        MPFR_SET_SAME_SIGN(s, z);
      71        MPFR_RET(0);
      72      }
      73    else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
      74      {
      75        if (MPFR_IS_ZERO(z))
      76          {
      77            int sign_p;
      78            sign_p = MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y));
      79            MPFR_SET_SIGN(s, (rnd_mode != MPFR_RNDD ?
      80                              (MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z) ?
      81                               MPFR_SIGN_NEG : MPFR_SIGN_POS) :
      82                              (MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z) ?
      83                               MPFR_SIGN_POS : MPFR_SIGN_NEG)));
      84            MPFR_SET_ZERO(s);
      85            MPFR_RET(0);
      86          }
      87        else
      88          return mpfr_set (s, z, rnd_mode);
      89      }
      90    else /* necessarily z is zero here */
      91      {
      92        MPFR_ASSERTD(MPFR_IS_ZERO(z));
      93        return (x == y) ? mpfr_sqr (s, x, rnd_mode)
      94          : mpfr_mul (s, x, y, rnd_mode);
      95      }
      96  }
      97  
      98  /* s <- x*y + z */
      99  int
     100  mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
     101            mpfr_rnd_t rnd_mode)
     102  {
     103    int inexact;
     104    mpfr_t u;
     105    mp_size_t n;
     106    mpfr_exp_t e;
     107    mpfr_prec_t precx, precy;
     108    MPFR_SAVE_EXPO_DECL (expo);
     109    MPFR_GROUP_DECL(group);
     110  
     111    MPFR_LOG_FUNC
     112      (("x[%Pd]=%.*Rg y[%Pd]=%.*Rg  z[%Pd]=%.*Rg rnd=%d",
     113        mpfr_get_prec (x), mpfr_log_prec, x,
     114        mpfr_get_prec (y), mpfr_log_prec, y,
     115        mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
     116       ("s[%Pd]=%.*Rg inexact=%d",
     117        mpfr_get_prec (s), mpfr_log_prec, s, inexact));
     118  
     119    /* particular cases */
     120    if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) || MPFR_IS_SINGULAR(y) ||
     121                       MPFR_IS_SINGULAR(z) ))
     122      return mpfr_fma_singular (s, x, y, z, rnd_mode);
     123  
     124    e = MPFR_GET_EXP (x) + MPFR_GET_EXP (y);
     125  
     126    precx = MPFR_PREC (x);
     127    precy = MPFR_PREC (y);
     128  
     129    /* First deal with special case where prec(x) = prec(y) and x*y does
     130       not overflow nor underflow. Do it only for small sizes since for large
     131       sizes x*y is faster using Mulders' algorithm (as a rule of thumb,
     132       we assume mpn_mul_n is faster up to 4*MPFR_MUL_THRESHOLD).
     133       Since |EXP(x)|, |EXP(y)| < 2^(k-2) on a k-bit computer,
     134       |EXP(x)+EXP(y)| < 2^(k-1), thus cannot overflow nor underflow. */
     135    if (precx == precy && e <= __gmpfr_emax && e > __gmpfr_emin)
     136      {
     137        if (precx < GMP_NUMB_BITS &&
     138            MPFR_PREC(z) == precx &&
     139            MPFR_PREC(s) == precx)
     140          {
     141            mp_limb_t umant[2], zmant[2];
     142            mpfr_t zz;
     143            int inex;
     144  
     145            umul_ppmm (umant[1], umant[0], MPFR_MANT(x)[0], MPFR_MANT(y)[0]);
     146            MPFR_PREC(u) = MPFR_PREC(zz) = 2 * precx;
     147            MPFR_MANT(u) = umant;
     148            MPFR_MANT(zz) = zmant;
     149            MPFR_SIGN(u) = MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y));
     150            MPFR_SIGN(zz) = MPFR_SIGN(z);
     151            MPFR_EXP(zz) = MPFR_EXP(z);
     152            if (MPFR_PREC(zz) <= GMP_NUMB_BITS) /* zz fits in one limb */
     153              {
     154                if ((umant[1] & MPFR_LIMB_HIGHBIT) == 0)
     155                  {
     156                    umant[0] = umant[1] << 1;
     157                    MPFR_EXP(u) = e - 1;
     158                  }
     159                else
     160                  {
     161                    umant[0] = umant[1];
     162                    MPFR_EXP(u) = e;
     163                  }
     164                zmant[0] = MPFR_MANT(z)[0];
     165              }
     166            else
     167              {
     168                zmant[1] = MPFR_MANT(z)[0];
     169                zmant[0] = MPFR_LIMB_ZERO;
     170                if ((umant[1] & MPFR_LIMB_HIGHBIT) == 0)
     171                  {
     172                    umant[1] = (umant[1] << 1) |
     173                      (umant[0] >> (GMP_NUMB_BITS - 1));
     174                    umant[0] = umant[0] << 1;
     175                    MPFR_EXP(u) = e - 1;
     176                  }
     177                else
     178                  MPFR_EXP(u) = e;
     179              }
     180            inex = mpfr_add (u, u, zz, rnd_mode);
     181            /* mpfr_set_1_2 requires PREC(u) = 2*PREC(s),
     182               thus we need PREC(s) = PREC(x) = PREC(y) = PREC(z) */
     183            return mpfr_set_1_2 (s, u, rnd_mode, inex);
     184          }
     185        else if ((n = MPFR_LIMB_SIZE(x)) <= 4 * MPFR_MUL_THRESHOLD)
     186          {
     187            mpfr_limb_ptr up;
     188            mp_size_t un = n + n;
     189            MPFR_TMP_DECL(marker);
     190  
     191            MPFR_TMP_MARK(marker);
     192            MPFR_TMP_INIT (up, u, un * GMP_NUMB_BITS, un);
     193            up = MPFR_MANT(u);
     194            /* multiply x*y exactly into u */
     195            if (x == y)
     196              mpn_sqr (up, MPFR_MANT(x), n);
     197            else
     198              mpn_mul_n (up, MPFR_MANT(x), MPFR_MANT(y), n);
     199            if (MPFR_LIMB_MSB (up[un - 1]) == 0)
     200              {
     201                mpn_lshift (up, up, un, 1);
     202                MPFR_EXP(u) = e - 1;
     203              }
     204            else
     205              MPFR_EXP(u) = e;
     206            MPFR_SIGN(u) = MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y));
     207            /* The above code does not generate any exception.
     208               The exceptions will come only from mpfr_add. */
     209            inexact = mpfr_add (s, u, z, rnd_mode);
     210            MPFR_TMP_FREE(marker);
     211            return inexact;
     212          }
     213      }
     214  
     215    /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
     216       is exact, except in case of overflow or underflow. */
     217    MPFR_ASSERTN (precx + precy <= MPFR_PREC_MAX);
     218    MPFR_GROUP_INIT_1 (group, precx + precy, u);
     219    MPFR_SAVE_EXPO_MARK (expo);
     220  
     221    if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
     222      {
     223        /* overflow or underflow - this case is regarded as rare, thus
     224           does not need to be very efficient (even if some tests below
     225           could have been done earlier).
     226           It is an overflow iff u is an infinity (since MPFR_RNDN was used).
     227           Alternatively, we could test the overflow flag, but in this case,
     228           mpfr_clear_flags would have been necessary. */
     229  
     230        if (MPFR_IS_INF (u))  /* overflow */
     231          {
     232            int sign_u = MPFR_SIGN (u);
     233  
     234            MPFR_LOG_MSG (("Overflow on x*y\n", 0));
     235            MPFR_GROUP_CLEAR (group);  /* we no longer need u */
     236  
     237            /* Let's eliminate the obvious case where x*y and z have the
     238               same sign. No possible cancellation -> real overflow.
     239               Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
     240               then |x*y| >= 2^(emax+1), and |x*y + z| > 2^emax. This case
     241               is also an overflow. */
     242            if (sign_u == MPFR_SIGN (z) || e >= __gmpfr_emax + 3)
     243              {
     244                MPFR_SAVE_EXPO_FREE (expo);
     245                return mpfr_overflow (s, rnd_mode, sign_u);
     246              }
     247          }
     248        else  /* underflow: one has |x*y| < 2^(emin-1). */
     249          {
     250            MPFR_LOG_MSG (("Underflow on x*y\n", 0));
     251  
     252            /* Easy cases: when 2^(emin-1) <= 1/2 * min(ulp(z),ulp(s)),
     253               one can replace x*y by sign(x*y) * 2^(emin-1). Note that
     254               this is even true in case of equality for MPFR_RNDN thanks
     255               to the even-rounding rule.
     256               The + 1 on MPFR_PREC (s) is necessary because the exponent
     257               of the result can be EXP(z) - 1. */
     258            if (MPFR_GET_EXP (z) - __gmpfr_emin >=
     259                MAX (MPFR_PREC (z), MPFR_PREC (s) + 1))
     260              {
     261                MPFR_PREC (u) = MPFR_PREC_MIN;
     262                mpfr_setmin (u, __gmpfr_emin);
     263                MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
     264                                                  MPFR_SIGN (y)));
     265                mpfr_clear_flags ();
     266                goto add;
     267              }
     268  
     269            MPFR_GROUP_CLEAR (group);  /* we no longer need u */
     270          }
     271  
     272        /* Let's use UBF to resolve the overflow/underflow issues. */
     273        {
     274          mpfr_ubf_t uu;
     275          mp_size_t un;
     276          mpfr_limb_ptr up;
     277          MPFR_TMP_DECL(marker);
     278  
     279          MPFR_LOG_MSG (("Use UBF\n", 0));
     280  
     281          MPFR_TMP_MARK (marker);
     282          un = MPFR_LIMB_SIZE (x) + MPFR_LIMB_SIZE (y);
     283          MPFR_TMP_INIT (up, uu, (mpfr_prec_t) un * GMP_NUMB_BITS, un);
     284          mpfr_ubf_mul_exact (uu, x, y);
     285          mpfr_clear_flags ();
     286          inexact = mpfr_add (s, (mpfr_srcptr) uu, z, rnd_mode);
     287          MPFR_UBF_CLEAR_EXP (uu);
     288          MPFR_TMP_FREE (marker);
     289        }
     290      }
     291    else
     292      {
     293      add:
     294        inexact = mpfr_add (s, u, z, rnd_mode);
     295        MPFR_GROUP_CLEAR (group);
     296      }
     297  
     298    MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
     299    MPFR_SAVE_EXPO_FREE (expo);
     300    return mpfr_check_range (s, inexact, rnd_mode);
     301  }