1  /* Compute x * y + z as ternary operation.
       2     Copyright (C) 2010-2018 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4     Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
       5  
       6     The GNU C Library is free software; you can redistribute it and/or
       7     modify it under the terms of the GNU Lesser General Public
       8     License as published by the Free Software Foundation; either
       9     version 2.1 of the License, or (at your option) any later version.
      10  
      11     The GNU C Library is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14     Lesser General Public License for more details.
      15  
      16     You should have received a copy of the GNU Lesser General Public
      17     License along with the GNU C Library; if not, see
      18     <http://www.gnu.org/licenses/>.  */
      19  
      20  #include "quadmath-imp.h"
      21  
      22  /* This implementation uses rounding to odd to avoid problems with
      23     double rounding.  See a paper by Boldo and Melquiond:
      24     http://www.lri.fr/~melquion/doc/08-tc.pdf  */
      25  
      26  __float128
      27  fmaq (__float128 x, __float128 y, __float128 z)
      28  {
      29    ieee854_float128 u, v, w;
      30    int adjust = 0;
      31    u.value = x;
      32    v.value = y;
      33    w.value = z;
      34    if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
      35  			>= 0x7fff + IEEE854_FLOAT128_BIAS
      36  			   - FLT128_MANT_DIG, 0)
      37        || __builtin_expect (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
      38        || __builtin_expect (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
      39        || __builtin_expect (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
      40        || __builtin_expect (u.ieee.exponent + v.ieee.exponent
      41  			   <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG, 0))
      42      {
      43        /* If z is Inf, but x and y are finite, the result should be
      44  	 z rather than NaN.  */
      45        if (w.ieee.exponent == 0x7fff
      46  	  && u.ieee.exponent != 0x7fff
      47            && v.ieee.exponent != 0x7fff)
      48  	return (z + x) + y;
      49        /* If z is zero and x are y are nonzero, compute the result
      50  	 as x * y to avoid the wrong sign of a zero result if x * y
      51  	 underflows to 0.  */
      52        if (z == 0 && x != 0 && y != 0)
      53  	return x * y;
      54        /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
      55  	 x * y + z.  */
      56        if (u.ieee.exponent == 0x7fff
      57  	  || v.ieee.exponent == 0x7fff
      58  	  || w.ieee.exponent == 0x7fff
      59  	  || x == 0
      60  	  || y == 0)
      61  	return x * y + z;
      62        /* If fma will certainly overflow, compute as x * y.  */
      63        if (u.ieee.exponent + v.ieee.exponent
      64  	  > 0x7fff + IEEE854_FLOAT128_BIAS)
      65  	return x * y;
      66        /* If x * y is less than 1/4 of FLT128_TRUE_MIN, neither the
      67  	 result nor whether there is underflow depends on its exact
      68  	 value, only on its sign.  */
      69        if (u.ieee.exponent + v.ieee.exponent
      70  	  < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)
      71  	{
      72  	  int neg = u.ieee.negative ^ v.ieee.negative;
      73  	  __float128 tiny = neg ? -0x1p-16494Q : 0x1p-16494Q;
      74  	  if (w.ieee.exponent >= 3)
      75  	    return tiny + z;
      76  	  /* Scaling up, adding TINY and scaling down produces the
      77  	     correct result, because in round-to-nearest mode adding
      78  	     TINY has no effect and in other modes double rounding is
      79  	     harmless.  But it may not produce required underflow
      80  	     exceptions.  */
      81  	  v.value = z * 0x1p114Q + tiny;
      82  	  if (TININESS_AFTER_ROUNDING
      83  	      ? v.ieee.exponent < 115
      84  	      : (w.ieee.exponent == 0
      85  		 || (w.ieee.exponent == 1
      86  		     && w.ieee.negative != neg
      87  		     && w.ieee.mantissa3 == 0
      88  		     && w.ieee.mantissa2 == 0
      89  		     && w.ieee.mantissa1 == 0
      90  		     && w.ieee.mantissa0 == 0)))
      91  	    {
      92  	      __float128 force_underflow = x * y;
      93  	      math_force_eval (force_underflow);
      94  	    }
      95  	  return v.value * 0x1p-114Q;
      96  	}
      97        if (u.ieee.exponent + v.ieee.exponent
      98  	  >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG)
      99  	{
     100  	  /* Compute 1p-113 times smaller result and multiply
     101  	     at the end.  */
     102  	  if (u.ieee.exponent > v.ieee.exponent)
     103  	    u.ieee.exponent -= FLT128_MANT_DIG;
     104  	  else
     105  	    v.ieee.exponent -= FLT128_MANT_DIG;
     106  	  /* If x + y exponent is very large and z exponent is very small,
     107  	     it doesn't matter if we don't adjust it.  */
     108  	  if (w.ieee.exponent > FLT128_MANT_DIG)
     109  	    w.ieee.exponent -= FLT128_MANT_DIG;
     110  	  adjust = 1;
     111  	}
     112        else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
     113  	{
     114  	  /* Similarly.
     115  	     If z exponent is very large and x and y exponents are
     116  	     very small, adjust them up to avoid spurious underflows,
     117  	     rather than down.  */
     118  	  if (u.ieee.exponent + v.ieee.exponent
     119  	      <= IEEE854_FLOAT128_BIAS + 2 * FLT128_MANT_DIG)
     120  	    {
     121  	      if (u.ieee.exponent > v.ieee.exponent)
     122  		u.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
     123  	      else
     124  		v.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
     125  	    }
     126  	  else if (u.ieee.exponent > v.ieee.exponent)
     127  	    {
     128  	      if (u.ieee.exponent > FLT128_MANT_DIG)
     129  		u.ieee.exponent -= FLT128_MANT_DIG;
     130  	    }
     131  	  else if (v.ieee.exponent > FLT128_MANT_DIG)
     132  	    v.ieee.exponent -= FLT128_MANT_DIG;
     133  	  w.ieee.exponent -= FLT128_MANT_DIG;
     134  	  adjust = 1;
     135  	}
     136        else if (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
     137  	{
     138  	  u.ieee.exponent -= FLT128_MANT_DIG;
     139  	  if (v.ieee.exponent)
     140  	    v.ieee.exponent += FLT128_MANT_DIG;
     141  	  else
     142  	    v.value *= 0x1p113Q;
     143  	}
     144        else if (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
     145  	{
     146  	  v.ieee.exponent -= FLT128_MANT_DIG;
     147  	  if (u.ieee.exponent)
     148  	    u.ieee.exponent += FLT128_MANT_DIG;
     149  	  else
     150  	    u.value *= 0x1p113Q;
     151  	}
     152        else /* if (u.ieee.exponent + v.ieee.exponent
     153  		  <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */
     154  	{
     155  	  if (u.ieee.exponent > v.ieee.exponent)
     156  	    u.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
     157  	  else
     158  	    v.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
     159  	  if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 6)
     160  	    {
     161  	      if (w.ieee.exponent)
     162  		w.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
     163  	      else
     164  		w.value *= 0x1p228Q;
     165  	      adjust = -1;
     166  	    }
     167  	  /* Otherwise x * y should just affect inexact
     168  	     and nothing else.  */
     169  	}
     170        x = u.value;
     171        y = v.value;
     172        z = w.value;
     173      }
     174  
     175    /* Ensure correct sign of exact 0 + 0.  */
     176    if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
     177      {
     178        x = math_opt_barrier (x);
     179        return x * y + z;
     180      }
     181  
     182    fenv_t env;
     183    feholdexcept (&env);
     184    fesetround (FE_TONEAREST);
     185  
     186    /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
     187  #define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1)
     188    __float128 x1 = x * C;
     189    __float128 y1 = y * C;
     190    __float128 m1 = x * y;
     191    x1 = (x - x1) + x1;
     192    y1 = (y - y1) + y1;
     193    __float128 x2 = x - x1;
     194    __float128 y2 = y - y1;
     195    __float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
     196  
     197    /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
     198    __float128 a1 = z + m1;
     199    __float128 t1 = a1 - z;
     200    __float128 t2 = a1 - t1;
     201    t1 = m1 - t1;
     202    t2 = z - t2;
     203    __float128 a2 = t1 + t2;
     204    /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
     205    math_force_eval (m2);
     206    math_force_eval (a2);
     207    feclearexcept (FE_INEXACT);
     208  
     209    /* If the result is an exact zero, ensure it has the correct sign.  */
     210    if (a1 == 0 && m2 == 0)
     211      {
     212        feupdateenv (&env);
     213        /* Ensure that round-to-nearest value of z + m1 is not reused.  */
     214        z = math_opt_barrier (z);
     215        return z + m1;
     216      }
     217  
     218    fesetround (FE_TOWARDZERO);
     219    /* Perform m2 + a2 addition with round to odd.  */
     220    u.value = a2 + m2;
     221  
     222    if (__glibc_likely (adjust == 0))
     223      {
     224        if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
     225  	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
     226        feupdateenv (&env);
     227        /* Result is a1 + u.value.  */
     228        return a1 + u.value;
     229      }
     230    else if (__glibc_likely (adjust > 0))
     231      {
     232        if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
     233  	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
     234        feupdateenv (&env);
     235        /* Result is a1 + u.value, scaled up.  */
     236        return (a1 + u.value) * 0x1p113Q;
     237      }
     238    else
     239      {
     240        if ((u.ieee.mantissa3 & 1) == 0)
     241  	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
     242        v.value = a1 + u.value;
     243        /* Ensure the addition is not scheduled after fetestexcept call.  */
     244        math_force_eval (v.value);
     245        int j = fetestexcept (FE_INEXACT) != 0;
     246        feupdateenv (&env);
     247        /* Ensure the following computations are performed in default rounding
     248  	 mode instead of just reusing the round to zero computation.  */
     249        asm volatile ("" : "=m" (u) : "m" (u));
     250        /* If a1 + u.value is exact, the only rounding happens during
     251  	 scaling down.  */
     252        if (j == 0)
     253  	return v.value * 0x1p-228Q;
     254        /* If result rounded to zero is not subnormal, no double
     255  	 rounding will occur.  */
     256        if (v.ieee.exponent > 228)
     257  	return (a1 + u.value) * 0x1p-228Q;
     258        /* If v.value * 0x1p-228L with round to zero is a subnormal above
     259  	 or equal to FLT128_MIN / 2, then v.value * 0x1p-228L shifts mantissa
     260  	 down just by 1 bit, which means v.ieee.mantissa3 |= j would
     261  	 change the round bit, not sticky or guard bit.
     262  	 v.value * 0x1p-228L never normalizes by shifting up,
     263  	 so round bit plus sticky bit should be already enough
     264  	 for proper rounding.  */
     265        if (v.ieee.exponent == 228)
     266  	{
     267  	  /* If the exponent would be in the normal range when
     268  	     rounding to normal precision with unbounded exponent
     269  	     range, the exact result is known and spurious underflows
     270  	     must be avoided on systems detecting tininess after
     271  	     rounding.  */
     272  	  if (TININESS_AFTER_ROUNDING)
     273  	    {
     274  	      w.value = a1 + u.value;
     275  	      if (w.ieee.exponent == 229)
     276  		return w.value * 0x1p-228Q;
     277  	    }
     278  	  /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
     279  	     v.ieee.mantissa3 & 1 is the round bit and j is our sticky
     280  	     bit.  */
     281  	  w.value = 0;
     282  	  w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
     283  	  w.ieee.negative = v.ieee.negative;
     284  	  v.ieee.mantissa3 &= ~3U;
     285  	  v.value *= 0x1p-228Q;
     286  	  w.value *= 0x1p-2Q;
     287  	  return v.value + w.value;
     288  	}
     289        v.ieee.mantissa3 |= j;
     290        return v.value * 0x1p-228Q;
     291      }
     292  }