1  /* Compute x * y + z as ternary operation.
       2     Copyright (C) 2011-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #define NO_MATH_REDIRECT
      20  #include <fenv.h>
      21  #include <float.h>
      22  #include <math.h>
      23  #include <math-barriers.h>
      24  #include <math_private.h>
      25  #include <fenv_private.h>
      26  #include <math-underflow.h>
      27  #include <math_ldbl_opt.h>
      28  #include <mul_split.h>
      29  #include <stdlib.h>
      30  
      31  /* Calculate X + Y exactly and store the result in *HI + *LO.  It is
      32     given that |X| >= |Y| and the values are small enough that no
      33     overflow occurs.  */
      34  
      35  static void
      36  add_split (double *hi, double *lo, double x, double y)
      37  {
      38    /* Apply Dekker's algorithm.  */
      39    *hi = x + y;
      40    *lo = (x - *hi) + y;
      41  }
      42  
      43  /* Value with extended range, used in intermediate computations.  */
      44  typedef struct
      45  {
      46    /* Value in [0.5, 1), as from frexp, or 0.  */
      47    double val;
      48    /* Exponent of power of 2 it is multiplied by, or 0 for zero.  */
      49    int exp;
      50  } ext_val;
      51  
      52  /* Store D as an ext_val value.  */
      53  
      54  static void
      55  store_ext_val (ext_val *v, double d)
      56  {
      57    v->val = __frexp (d, &v->exp);
      58  }
      59  
      60  /* Store X * Y as ext_val values *V0 and *V1.  */
      61  
      62  static void
      63  mul_ext_val (ext_val *v0, ext_val *v1, double x, double y)
      64  {
      65    int xexp, yexp;
      66    x = __frexp (x, &xexp);
      67    y = __frexp (y, &yexp);
      68    double hi, lo;
      69    mul_split (&hi, &lo, x, y);
      70    store_ext_val (v0, hi);
      71    if (hi != 0)
      72      v0->exp += xexp + yexp;
      73    store_ext_val (v1, lo);
      74    if (lo != 0)
      75      v1->exp += xexp + yexp;
      76  }
      77  
      78  /* Compare absolute values of ext_val values pointed to by P and Q for
      79     qsort.  */
      80  
      81  static int
      82  compare (const void *p, const void *q)
      83  {
      84    const ext_val *pe = p;
      85    const ext_val *qe = q;
      86    if (pe->val == 0)
      87      return qe->val == 0 ? 0 : -1;
      88    else if (qe->val == 0)
      89      return 1;
      90    else if (pe->exp < qe->exp)
      91      return -1;
      92    else if (pe->exp > qe->exp)
      93      return 1;
      94    else
      95      {
      96        double pd = fabs (pe->val);
      97        double qd = fabs (qe->val);
      98        if (pd < qd)
      99  	return -1;
     100        else if (pd == qd)
     101  	return 0;
     102        else
     103  	return 1;
     104      }
     105  }
     106  
     107  /* Calculate *X + *Y exactly, storing the high part in *X (rounded to
     108     nearest) and the low part in *Y.  It is given that |X| >= |Y|.  */
     109  
     110  static void
     111  add_split_ext (ext_val *x, ext_val *y)
     112  {
     113    int xexp = x->exp, yexp = y->exp;
     114    if (y->val == 0 || xexp - yexp > 53)
     115      return;
     116    double hi = x->val;
     117    double lo = __scalbn (y->val, yexp - xexp);
     118    add_split (&hi, &lo, hi, lo);
     119    store_ext_val (x, hi);
     120    if (hi != 0)
     121      x->exp += xexp;
     122    store_ext_val (y, lo);
     123    if (lo != 0)
     124      y->exp += xexp;
     125  }
     126  
     127  long double
     128  __fmal (long double x, long double y, long double z)
     129  {
     130    double xhi, xlo, yhi, ylo, zhi, zlo;
     131    int64_t hx, hy, hz;
     132    int xexp, yexp, zexp;
     133    double scale_val;
     134    int scale_exp;
     135    ldbl_unpack (x, &xhi, &xlo);
     136    EXTRACT_WORDS64 (hx, xhi);
     137    xexp = (hx & 0x7ff0000000000000LL) >> 52;
     138    ldbl_unpack (y, &yhi, &ylo);
     139    EXTRACT_WORDS64 (hy, yhi);
     140    yexp = (hy & 0x7ff0000000000000LL) >> 52;
     141    ldbl_unpack (z, &zhi, &zlo);
     142    EXTRACT_WORDS64 (hz, zhi);
     143    zexp = (hz & 0x7ff0000000000000LL) >> 52;
     144  
     145    /* If z is Inf or NaN, but x and y are finite, avoid any exceptions
     146       from computing x * y.  */
     147    if (zexp == 0x7ff && xexp != 0x7ff && yexp != 0x7ff)
     148      return (z + x) + y;
     149  
     150    /* If z is zero and x are y are nonzero, compute the result as x * y
     151       to avoid the wrong sign of a zero result if x * y underflows to
     152       0.  */
     153    if (z == 0 && x != 0 && y != 0)
     154      return x * y;
     155  
     156    /* If x or y or z is Inf/NaN, or if x * y is zero, compute as x * y
     157       + z.  */
     158    if (xexp == 0x7ff || yexp == 0x7ff || zexp == 0x7ff
     159        || x == 0 || y == 0)
     160      return (x * y) + z;
     161  
     162    {
     163      SET_RESTORE_ROUND (FE_TONEAREST);
     164  
     165      ext_val vals[10];
     166      store_ext_val (&vals[0], zhi);
     167      store_ext_val (&vals[1], zlo);
     168      mul_ext_val (&vals[2], &vals[3], xhi, yhi);
     169      mul_ext_val (&vals[4], &vals[5], xhi, ylo);
     170      mul_ext_val (&vals[6], &vals[7], xlo, yhi);
     171      mul_ext_val (&vals[8], &vals[9], xlo, ylo);
     172      qsort (vals, 10, sizeof (ext_val), compare);
     173      /* Add up the values so that each element of VALS has absolute
     174         value at most equal to the last set bit of the next nonzero
     175         element.  */
     176      for (size_t i = 0; i <= 8; i++)
     177        {
     178  	add_split_ext (&vals[i + 1], &vals[i]);
     179  	qsort (vals + i + 1, 9 - i, sizeof (ext_val), compare);
     180        }
     181      /* Add up the values in the other direction, so that each element
     182         of VALS has absolute value less than 5ulp of the next
     183         value.  */
     184      size_t dstpos = 9;
     185      for (size_t i = 1; i <= 9; i++)
     186        {
     187  	if (vals[dstpos].val == 0)
     188  	  {
     189  	    vals[dstpos] = vals[9 - i];
     190  	    vals[9 - i].val = 0;
     191  	    vals[9 - i].exp = 0;
     192  	  }
     193  	else
     194  	  {
     195  	    add_split_ext (&vals[dstpos], &vals[9 - i]);
     196  	    if (vals[9 - i].val != 0)
     197  	      {
     198  		if (9 - i < dstpos - 1)
     199  		  {
     200  		    vals[dstpos - 1] = vals[9 - i];
     201  		    vals[9 - i].val = 0;
     202  		    vals[9 - i].exp = 0;
     203  		  }
     204  		dstpos--;
     205  	      }
     206  	  }
     207        }
     208      /* If the result is an exact zero, it results from adding two
     209         values with opposite signs; recompute in the original rounding
     210         mode.  */
     211      if (vals[9].val == 0)
     212        goto zero_out;
     213      /* Adding the top three values will now give a result as accurate
     214         as the underlying long double arithmetic.  */
     215      add_split_ext (&vals[9], &vals[8]);
     216      if (compare (&vals[8], &vals[7]) < 0)
     217        {
     218  	ext_val tmp = vals[7];
     219  	vals[7] = vals[8];
     220  	vals[8] = tmp;
     221        }
     222      add_split_ext (&vals[8], &vals[7]);
     223      add_split_ext (&vals[9], &vals[8]);
     224      if (vals[9].exp > DBL_MAX_EXP || vals[9].exp < DBL_MIN_EXP)
     225        {
     226  	/* Overflow or underflow, with the result depending on the
     227  	   original rounding mode, but not on the low part computed
     228  	   here.  */
     229  	scale_val = vals[9].val;
     230  	scale_exp = vals[9].exp;
     231  	goto scale_out;
     232        }
     233      double hi = __scalbn (vals[9].val, vals[9].exp);
     234      double lo = __scalbn (vals[8].val, vals[8].exp);
     235      /* It is possible that the low part became subnormal and was
     236         rounded so that the result is no longer canonical.  */
     237      ldbl_canonicalize (&hi, &lo);
     238      long double ret = ldbl_pack (hi, lo);
     239      math_check_force_underflow (ret);
     240      return ret;
     241    }
     242  
     243   scale_out:
     244    scale_val = math_opt_barrier (scale_val);
     245    scale_val = __scalbn (scale_val, scale_exp);
     246    if (fabs (scale_val) == DBL_MAX)
     247      return copysignl (LDBL_MAX, scale_val);
     248    math_check_force_underflow (scale_val);
     249    return scale_val;
     250  
     251   zero_out:;
     252    double zero = 0.0;
     253    zero = math_opt_barrier (zero);
     254    return zero - zero;
     255  }
     256  #if IS_IN (libm)
     257  long_double_symbol (libm, __fmal, fmal);
     258  #else
     259  long_double_symbol (libc, __fmal, fmal);
     260  #endif