1  /* Compute full X * Y for double type.
       2     Copyright (C) 2013-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifndef _MUL_SPLIT_H
      20  #define _MUL_SPLIT_H
      21  
      22  #include <float.h>
      23  
      24  /* Calculate X * Y exactly and store the result in *HI + *LO.  It is
      25     given that the values are small enough that no overflow occurs and
      26     large enough (or zero) that no underflow occurs.  */
      27  
      28  static void
      29  mul_split (double *hi, double *lo, double x, double y)
      30  {
      31  #ifdef __FP_FAST_FMA
      32    /* Fast built-in fused multiply-add.  */
      33    *hi = x * y;
      34    *lo = __builtin_fma (x, y, -*hi);
      35  #else
      36    /* Apply Dekker's algorithm.  */
      37    *hi = x * y;
      38  # define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
      39    double x1 = x * C;
      40    double y1 = y * C;
      41  # undef C
      42    x1 = (x - x1) + x1;
      43    y1 = (y - y1) + y1;
      44    double x2 = x - x1;
      45    double y2 = y - y1;
      46    *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
      47  #endif
      48  }
      49  
      50  /* Add a + b exactly, such that *hi + *lo = a + b.
      51     Assumes |a| >= |b| and rounding to nearest.  */
      52  static inline void
      53  fast_two_sum (double *hi, double *lo, double a, double b)
      54  {
      55    double e;
      56  
      57    *hi = a + b;
      58    e = *hi - a; /* exact  */
      59    *lo = b - e; /* exact  */
      60    /* Now *hi + *lo = a + b exactly.  */
      61  }
      62  
      63  /* Multiplication of two floating-point expansions: *hi + *lo is an
      64     approximation of (h1+l1)*(h2+l2), assuming |l1| <= 1/2*ulp(h1)
      65     and |l2| <= 1/2*ulp(h2) and rounding to nearest.  */
      66  static inline void
      67  mul_expansion (double *hi, double *lo, double h1, double l1,
      68  	       double h2, double l2)
      69  {
      70    double r, e;
      71  
      72    mul_split (hi, lo, h1, h2);
      73    r = h1 * l2 + h2 * l1;
      74    /* Now add r to (hi,lo) using fast two-sum, where we know |r| < |hi|.  */
      75    fast_two_sum (hi, &e, *hi, r);
      76    *lo -= e;
      77  }
      78  
      79  /* Calculate X / Y and store the approximate result in *HI + *LO.  It is
      80     assumed that Y is not zero, that no overflow nor underflow occurs, and
      81     rounding is to nearest.  */
      82  static inline void
      83  div_split (double *hi, double *lo, double x, double y)
      84  {
      85    double a, b;
      86  
      87    *hi = x / y;
      88    mul_split (&a, &b, *hi, y);
      89    /* a + b = hi*y, which should be near x.  */
      90    a = x - a; /* huge cancellation  */
      91    a = a - b;
      92    /* Now x ~ hi*y + a thus x/y ~ hi + a/y.  */
      93    *lo = a / y;
      94  }
      95  
      96  /* Division of two floating-point expansions: *hi + *lo is an
      97     approximation of (h1+l1)/(h2+l2), assuming |l1| <= 1/2*ulp(h1)
      98     and |l2| <= 1/2*ulp(h2), h2+l2 is not zero, and rounding to nearest.  */
      99  static inline void
     100  div_expansion (double *hi, double *lo, double h1, double l1,
     101  	       double h2, double l2)
     102  {
     103    double r, e;
     104  
     105    div_split (hi, lo, h1, h2);
     106    /* (h1+l1)/(h2+l2) ~ h1/h2 + (l1*h2 - l2*h1)/h2^2  */
     107    r = (l1 * h2 - l2 * h1) / (h2 * h2);
     108    /* Now add r to (hi,lo) using fast two-sum, where we know |r| < |hi|.  */
     109    fast_two_sum (hi, &e, *hi, r);
     110    *lo += e;
     111    /* Renormalize since |lo| might be larger than 0.5 ulp(hi).  */
     112    fast_two_sum (hi, lo, *hi, *lo);
     113  }
     114  
     115  #endif /* _MUL_SPLIT_H */