1  /* Quad-precision floating point e^x.
       2     Copyright (C) 1999-2018 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4     Contributed by Jakub Jelinek <jj@ultra.linux.cz>
       5     Partly based on double-precision code
       6     by Geoffrey Keating <geoffk@ozemail.com.au>
       7  
       8     The GNU C Library is free software; you can redistribute it and/or
       9     modify it under the terms of the GNU Lesser General Public
      10     License as published by the Free Software Foundation; either
      11     version 2.1 of the License, or (at your option) any later version.
      12  
      13     The GNU C Library is distributed in the hope that it will be useful,
      14     but WITHOUT ANY WARRANTY; without even the implied warranty of
      15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16     Lesser General Public License for more details.
      17  
      18     You should have received a copy of the GNU Lesser General Public
      19     License along with the GNU C Library; if not, see
      20     <http://www.gnu.org/licenses/>.  */
      21  
      22  /* The basic design here is from
      23     Abraham Ziv, "Fast Evaluation of Elementary Mathematical Functions with
      24     Correctly Rounded Last Bit", ACM Trans. Math. Soft., 17 (3), September 1991,
      25     pp. 410-423.
      26  
      27     We work with number pairs where the first number is the high part and
      28     the second one is the low part. Arithmetic with the high part numbers must
      29     be exact, without any roundoff errors.
      30  
      31     The input value, X, is written as
      32     X = n * ln(2)_0 + arg1[t1]_0 + arg2[t2]_0 + x
      33         - n * ln(2)_1 + arg1[t1]_1 + arg2[t2]_1 + xl
      34  
      35     where:
      36     - n is an integer, 16384 >= n >= -16495;
      37     - ln(2)_0 is the first 93 bits of ln(2), and |ln(2)_0-ln(2)-ln(2)_1| < 2^-205
      38     - t1 is an integer, 89 >= t1 >= -89
      39     - t2 is an integer, 65 >= t2 >= -65
      40     - |arg1[t1]-t1/256.0| < 2^-53
      41     - |arg2[t2]-t2/32768.0| < 2^-53
      42     - x + xl is whatever is left, |x + xl| < 2^-16 + 2^-53
      43  
      44     Then e^x is approximated as
      45  
      46     e^x = 2^n_1 ( 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
      47  	       + 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
      48  		 * p (x + xl + n * ln(2)_1))
      49     where:
      50     - p(x) is a polynomial approximating e(x)-1
      51     - e^(arg1[t1]_0 + arg1[t1]_1) is obtained from a table
      52     - e^(arg2[t2]_0 + arg2[t2]_1) likewise
      53     - n_1 + n_0 = n, so that |n_0| < -FLT128_MIN_EXP-1.
      54  
      55     If it happens that n_1 == 0 (this is the usual case), that multiplication
      56     is omitted.
      57     */
      58  
      59  #ifndef _GNU_SOURCE
      60  #define _GNU_SOURCE
      61  #endif
      62  
      63  #include "quadmath-imp.h"
      64  #include "expq_table.h"
      65  
      66  static const __float128 C[] = {
      67  /* Smallest integer x for which e^x overflows.  */
      68  #define himark C[0]
      69   11356.523406294143949491931077970765Q,
      70  
      71  /* Largest integer x for which e^x underflows.  */
      72  #define lomark C[1]
      73  -11433.4627433362978788372438434526231Q,
      74  
      75  /* 3x2^96 */
      76  #define THREEp96 C[2]
      77   59421121885698253195157962752.0Q,
      78  
      79  /* 3x2^103 */
      80  #define THREEp103 C[3]
      81   30423614405477505635920876929024.0Q,
      82  
      83  /* 3x2^111 */
      84  #define THREEp111 C[4]
      85   7788445287802241442795744493830144.0Q,
      86  
      87  /* 1/ln(2) */
      88  #define M_1_LN2 C[5]
      89   1.44269504088896340735992468100189204Q,
      90  
      91  /* first 93 bits of ln(2) */
      92  #define M_LN2_0 C[6]
      93   0.693147180559945309417232121457981864Q,
      94  
      95  /* ln2_0 - ln(2) */
      96  #define M_LN2_1 C[7]
      97  -1.94704509238074995158795957333327386E-31Q,
      98  
      99  /* very small number */
     100  #define TINY C[8]
     101   1.0e-4900Q,
     102  
     103  /* 2^16383 */
     104  #define TWO16383 C[9]
     105   5.94865747678615882542879663314003565E+4931Q,
     106  
     107  /* 256 */
     108  #define TWO8 C[10]
     109   256,
     110  
     111  /* 32768 */
     112  #define TWO15 C[11]
     113   32768,
     114  
     115  /* Chebyshev polynom coefficients for (exp(x)-1)/x */
     116  #define P1 C[12]
     117  #define P2 C[13]
     118  #define P3 C[14]
     119  #define P4 C[15]
     120  #define P5 C[16]
     121  #define P6 C[17]
     122   0.5Q,
     123   1.66666666666666666666666666666666683E-01Q,
     124   4.16666666666666666666654902320001674E-02Q,
     125   8.33333333333333333333314659767198461E-03Q,
     126   1.38888888889899438565058018857254025E-03Q,
     127   1.98412698413981650382436541785404286E-04Q,
     128  };
     129  
     130  __float128
     131  expq (__float128 x)
     132  {
     133    /* Check for usual case.  */
     134    if (__builtin_isless (x, himark) && __builtin_isgreater (x, lomark))
     135      {
     136        int tval1, tval2, unsafe, n_i;
     137        __float128 x22, n, t, result, xl;
     138        ieee854_float128 ex2_u, scale_u;
     139        fenv_t oldenv;
     140  
     141        feholdexcept (&oldenv);
     142  #ifdef FE_TONEAREST
     143        fesetround (FE_TONEAREST);
     144  #endif
     145  
     146        /* Calculate n.  */
     147        n = x * M_1_LN2 + THREEp111;
     148        n -= THREEp111;
     149        x = x - n * M_LN2_0;
     150        xl = n * M_LN2_1;
     151  
     152        /* Calculate t/256.  */
     153        t = x + THREEp103;
     154        t -= THREEp103;
     155  
     156        /* Compute tval1 = t.  */
     157        tval1 = (int) (t * TWO8);
     158  
     159        x -= __expq_table[T_EXPL_ARG1+2*tval1];
     160        xl -= __expq_table[T_EXPL_ARG1+2*tval1+1];
     161  
     162        /* Calculate t/32768.  */
     163        t = x + THREEp96;
     164        t -= THREEp96;
     165  
     166        /* Compute tval2 = t.  */
     167        tval2 = (int) (t * TWO15);
     168  
     169        x -= __expq_table[T_EXPL_ARG2+2*tval2];
     170        xl -= __expq_table[T_EXPL_ARG2+2*tval2+1];
     171  
     172        x = x + xl;
     173  
     174        /* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]).  */
     175        ex2_u.value = __expq_table[T_EXPL_RES1 + tval1]
     176  		* __expq_table[T_EXPL_RES2 + tval2];
     177        n_i = (int)n;
     178        /* 'unsafe' is 1 iff n_1 != 0.  */
     179        unsafe = abs(n_i) >= 15000;
     180        ex2_u.ieee.exponent += n_i >> unsafe;
     181  
     182        /* Compute scale = 2^n_1.  */
     183        scale_u.value = 1;
     184        scale_u.ieee.exponent += n_i - (n_i >> unsafe);
     185  
     186        /* Approximate e^x2 - 1, using a seventh-degree polynomial,
     187  	 with maximum error in [-2^-16-2^-53,2^-16+2^-53]
     188  	 less than 4.8e-39.  */
     189        x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
     190        math_force_eval (x22);
     191  
     192        /* Return result.  */
     193        fesetenv (&oldenv);
     194  
     195        result = x22 * ex2_u.value + ex2_u.value;
     196  
     197        /* Now we can test whether the result is ultimate or if we are unsure.
     198  	 In the later case we should probably call a mpn based routine to give
     199  	 the ultimate result.
     200  	 Empirically, this routine is already ultimate in about 99.9986% of
     201  	 cases, the test below for the round to nearest case will be false
     202  	 in ~ 99.9963% of cases.
     203  	 Without proc2 routine maximum error which has been seen is
     204  	 0.5000262 ulp.
     205  
     206  	  ieee854_float128 ex3_u;
     207  
     208  	  #ifdef FE_TONEAREST
     209  	    fesetround (FE_TONEAREST);
     210  	  #endif
     211  	  ex3_u.value = (result - ex2_u.value) - x22 * ex2_u.value;
     212  	  ex2_u.value = result;
     213  	  ex3_u.ieee.exponent += FLT128_MANT_DIG + 15 + IEEE854_FLOAT128_BIAS
     214  				 - ex2_u.ieee.exponent;
     215  	  n_i = abs (ex3_u.value);
     216  	  n_i = (n_i + 1) / 2;
     217  	  fesetenv (&oldenv);
     218  	  #ifdef FE_TONEAREST
     219  	  if (fegetround () == FE_TONEAREST)
     220  	    n_i -= 0x4000;
     221  	  #endif
     222  	  if (!n_i) {
     223  	    return __ieee754_expl_proc2 (origx);
     224  	  }
     225         */
     226        if (!unsafe)
     227  	return result;
     228        else
     229  	{
     230  	  result *= scale_u.value;
     231  	  math_check_force_underflow_nonneg (result);
     232  	  return result;
     233  	}
     234      }
     235    /* Exceptional cases:  */
     236    else if (__builtin_isless (x, himark))
     237      {
     238        if (isinfq (x))
     239  	/* e^-inf == 0, with no error.  */
     240  	return 0;
     241        else
     242  	/* Underflow */
     243  	return TINY * TINY;
     244      }
     245    else
     246      /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
     247      return TWO16383*x;
     248  }