1  /*
       2   * ====================================================
       3   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       4   *
       5   * Developed at SunPro, a Sun Microsystems, Inc. business.
       6   * Permission to use, copy, modify, and distribute this
       7   * software is freely granted, provided that this notice
       8   * is preserved.
       9   * ====================================================
      10   */
      11  
      12  /* Long double expansions are
      13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
      14    and are incorporated herein by permission of the author.  The author
      15    reserves the right to distribute this material elsewhere under different
      16    copying permissions.  These modifications are distributed here under
      17    the following terms:
      18  
      19      This library is free software; you can redistribute it and/or
      20      modify it under the terms of the GNU Lesser General Public
      21      License as published by the Free Software Foundation; either
      22      version 2.1 of the License, or (at your option) any later version.
      23  
      24      This library is distributed in the hope that it will be useful,
      25      but WITHOUT ANY WARRANTY; without even the implied warranty of
      26      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      27      Lesser General Public License for more details.
      28  
      29      You should have received a copy of the GNU Lesser General Public
      30      License along with this library; if not, see
      31      <https://www.gnu.org/licenses/>.  */
      32  
      33  /* double erf(double x)
      34   * double erfc(double x)
      35   *			     x
      36   *		      2      |\
      37   *     erf(x)  =  ---------  | exp(-t*t)dt
      38   *		   sqrt(pi) \|
      39   *			     0
      40   *
      41   *     erfc(x) =  1-erf(x)
      42   *  Note that
      43   *		erf(-x) = -erf(x)
      44   *		erfc(-x) = 2 - erfc(x)
      45   *
      46   * Method:
      47   *	1. For |x| in [0, 0.84375]
      48   *	    erf(x)  = x + x*R(x^2)
      49   *          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
      50   *                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
      51   *	   Remark. The formula is derived by noting
      52   *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
      53   *	   and that
      54   *          2/sqrt(pi) = 1.128379167095512573896158903121545171688
      55   *	   is close to one. The interval is chosen because the fix
      56   *	   point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
      57   *	   near 0.6174), and by some experiment, 0.84375 is chosen to
      58   *	   guarantee the error is less than one ulp for erf.
      59   *
      60   *      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
      61   *         c = 0.84506291151 rounded to single (24 bits)
      62   *	erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
      63   *	erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
      64   *			  1+(c+P1(s)/Q1(s))    if x < 0
      65   *	   Remark: here we use the taylor series expansion at x=1.
      66   *		erf(1+s) = erf(1) + s*Poly(s)
      67   *			 = 0.845.. + P1(s)/Q1(s)
      68   *	   Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
      69   *
      70   *      3. For x in [1.25,1/0.35(~2.857143)],
      71   *	erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
      72   *              z=1/x^2
      73   *	erf(x)  = 1 - erfc(x)
      74   *
      75   *      4. For x in [1/0.35,107]
      76   *	erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
      77   *			= 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
      78   *                             if -6.666<x<0
      79   *			= 2.0 - tiny		(if x <= -6.666)
      80   *              z=1/x^2
      81   *	erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
      82   *	erf(x)  = sign(x)*(1.0 - tiny)
      83   *      Note1:
      84   *	   To compute exp(-x*x-0.5625+R/S), let s be a single
      85   *	   precision number and s := x; then
      86   *		-x*x = -s*s + (s-x)*(s+x)
      87   *	        exp(-x*x-0.5626+R/S) =
      88   *			exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
      89   *      Note2:
      90   *	   Here 4 and 5 make use of the asymptotic series
      91   *			  exp(-x*x)
      92   *		erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
      93   *			  x*sqrt(pi)
      94   *
      95   *      5. For inf > x >= 107
      96   *	erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
      97   *	erfc(x) = tiny*tiny (raise underflow) if x > 0
      98   *			= 2 - tiny if x<0
      99   *
     100   *      7. Special case:
     101   *	erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
     102   *	erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
     103   *		erfc/erf(NaN) is NaN
     104   */
     105  
     106  
     107  #include <errno.h>
     108  #include <float.h>
     109  #include <math.h>
     110  #include <math_private.h>
     111  #include <math-underflow.h>
     112  #include <libm-alias-ldouble.h>
     113  
     114  static const long double
     115  tiny = 1e-4931L,
     116    half = 0.5L,
     117    one = 1.0L,
     118    two = 2.0L,
     119  	/* c = (float)0.84506291151 */
     120    erx = 0.845062911510467529296875L,
     121  /*
     122   * Coefficients for approximation to  erf on [0,0.84375]
     123   */
     124    /* 2/sqrt(pi) - 1 */
     125    efx = 1.2837916709551257389615890312154517168810E-1L,
     126  
     127    pp[6] = {
     128      1.122751350964552113068262337278335028553E6L,
     129      -2.808533301997696164408397079650699163276E6L,
     130      -3.314325479115357458197119660818768924100E5L,
     131      -6.848684465326256109712135497895525446398E4L,
     132      -2.657817695110739185591505062971929859314E3L,
     133      -1.655310302737837556654146291646499062882E2L,
     134    },
     135  
     136    qq[6] = {
     137      8.745588372054466262548908189000448124232E6L,
     138      3.746038264792471129367533128637019611485E6L,
     139      7.066358783162407559861156173539693900031E5L,
     140      7.448928604824620999413120955705448117056E4L,
     141      4.511583986730994111992253980546131408924E3L,
     142      1.368902937933296323345610240009071254014E2L,
     143      /* 1.000000000000000000000000000000000000000E0 */
     144    },
     145  
     146  /*
     147   * Coefficients for approximation to  erf  in [0.84375,1.25]
     148   */
     149  /* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
     150     -0.15625 <= x <= +.25
     151     Peak relative error 8.5e-22  */
     152  
     153    pa[8] = {
     154      -1.076952146179812072156734957705102256059E0L,
     155       1.884814957770385593365179835059971587220E2L,
     156      -5.339153975012804282890066622962070115606E1L,
     157       4.435910679869176625928504532109635632618E1L,
     158       1.683219516032328828278557309642929135179E1L,
     159      -2.360236618396952560064259585299045804293E0L,
     160       1.852230047861891953244413872297940938041E0L,
     161       9.394994446747752308256773044667843200719E-2L,
     162    },
     163  
     164    qa[7] =  {
     165      4.559263722294508998149925774781887811255E2L,
     166      3.289248982200800575749795055149780689738E2L,
     167      2.846070965875643009598627918383314457912E2L,
     168      1.398715859064535039433275722017479994465E2L,
     169      6.060190733759793706299079050985358190726E1L,
     170      2.078695677795422351040502569964299664233E1L,
     171      4.641271134150895940966798357442234498546E0L,
     172      /* 1.000000000000000000000000000000000000000E0 */
     173    },
     174  
     175  /*
     176   * Coefficients for approximation to  erfc in [1.25,1/0.35]
     177   */
     178  /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
     179     1/2.85711669921875 < 1/x < 1/1.25
     180     Peak relative error 3.1e-21  */
     181  
     182      ra[] = {
     183        1.363566591833846324191000679620738857234E-1L,
     184        1.018203167219873573808450274314658434507E1L,
     185        1.862359362334248675526472871224778045594E2L,
     186        1.411622588180721285284945138667933330348E3L,
     187        5.088538459741511988784440103218342840478E3L,
     188        8.928251553922176506858267311750789273656E3L,
     189        7.264436000148052545243018622742770549982E3L,
     190        2.387492459664548651671894725748959751119E3L,
     191        2.220916652813908085449221282808458466556E2L,
     192      },
     193  
     194      sa[] = {
     195        -1.382234625202480685182526402169222331847E1L,
     196        -3.315638835627950255832519203687435946482E2L,
     197        -2.949124863912936259747237164260785326692E3L,
     198        -1.246622099070875940506391433635999693661E4L,
     199        -2.673079795851665428695842853070996219632E4L,
     200        -2.880269786660559337358397106518918220991E4L,
     201        -1.450600228493968044773354186390390823713E4L,
     202        -2.874539731125893533960680525192064277816E3L,
     203        -1.402241261419067750237395034116942296027E2L,
     204        /* 1.000000000000000000000000000000000000000E0 */
     205      },
     206  /*
     207   * Coefficients for approximation to  erfc in [1/.35,107]
     208   */
     209  /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
     210     1/6.6666259765625 < 1/x < 1/2.85711669921875
     211     Peak relative error 4.2e-22  */
     212      rb[] = {
     213        -4.869587348270494309550558460786501252369E-5L,
     214        -4.030199390527997378549161722412466959403E-3L,
     215        -9.434425866377037610206443566288917589122E-2L,
     216        -9.319032754357658601200655161585539404155E-1L,
     217        -4.273788174307459947350256581445442062291E0L,
     218        -8.842289940696150508373541814064198259278E0L,
     219        -7.069215249419887403187988144752613025255E0L,
     220        -1.401228723639514787920274427443330704764E0L,
     221      },
     222  
     223      sb[] = {
     224        4.936254964107175160157544545879293019085E-3L,
     225        1.583457624037795744377163924895349412015E-1L,
     226        1.850647991850328356622940552450636420484E0L,
     227        9.927611557279019463768050710008450625415E0L,
     228        2.531667257649436709617165336779212114570E1L,
     229        2.869752886406743386458304052862814690045E1L,
     230        1.182059497870819562441683560749192539345E1L,
     231        /* 1.000000000000000000000000000000000000000E0 */
     232      },
     233  /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
     234     1/107 <= 1/x <= 1/6.6666259765625
     235     Peak relative error 1.1e-21  */
     236      rc[] = {
     237        -8.299617545269701963973537248996670806850E-5L,
     238        -6.243845685115818513578933902532056244108E-3L,
     239        -1.141667210620380223113693474478394397230E-1L,
     240        -7.521343797212024245375240432734425789409E-1L,
     241        -1.765321928311155824664963633786967602934E0L,
     242        -1.029403473103215800456761180695263439188E0L,
     243      },
     244  
     245      sc[] = {
     246        8.413244363014929493035952542677768808601E-3L,
     247        2.065114333816877479753334599639158060979E-1L,
     248        1.639064941530797583766364412782135680148E0L,
     249        4.936788463787115555582319302981666347450E0L,
     250        5.005177727208955487404729933261347679090E0L,
     251        /* 1.000000000000000000000000000000000000000E0 */
     252      };
     253  
     254  long double
     255  __erfl (long double x)
     256  {
     257    long double R, S, P, Q, s, y, z, r;
     258    int32_t ix, i;
     259    uint32_t se, i0, i1;
     260  
     261    GET_LDOUBLE_WORDS (se, i0, i1, x);
     262    ix = se & 0x7fff;
     263  
     264    if (ix >= 0x7fff)
     265      {				/* erf(nan)=nan */
     266        i = ((se & 0xffff) >> 15) << 1;
     267        return (long double) (1 - i) + one / x;	/* erf(+-inf)=+-1 */
     268      }
     269  
     270    ix = (ix << 16) | (i0 >> 16);
     271    if (ix < 0x3ffed800) /* |x|<0.84375 */
     272      {
     273        if (ix < 0x3fde8000) /* |x|<2**-33 */
     274  	{
     275  	  if (ix < 0x00080000)
     276  	    {
     277  	      /* Avoid spurious underflow.  */
     278  	      long double ret = 0.0625 * (16.0 * x + (16.0 * efx) * x);
     279  	      math_check_force_underflow (ret);
     280  	      return ret;
     281  	    }
     282  	  return x + efx * x;
     283  	}
     284        z = x * x;
     285        r = pp[0] + z * (pp[1]
     286            + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
     287        s = qq[0] + z * (qq[1]
     288  	  + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
     289        y = r / s;
     290        return x + x * y;
     291      }
     292    if (ix < 0x3fffa000) /* 1.25 */
     293      {				/* 0.84375 <= |x| < 1.25 */
     294        s = fabsl (x) - one;
     295        P = pa[0] + s * (pa[1] + s * (pa[2]
     296          + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
     297        Q = qa[0] + s * (qa[1] + s * (qa[2]
     298          + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
     299        if ((se & 0x8000) == 0)
     300  	return erx + P / Q;
     301        else
     302  	return -erx - P / Q;
     303      }
     304    if (ix >= 0x4001d555) /* 6.6666259765625 */
     305      {				/* inf>|x|>=6.666 */
     306        if ((se & 0x8000) == 0)
     307  	return one - tiny;
     308        else
     309  	return tiny - one;
     310      }
     311    x = fabsl (x);
     312    s = one / (x * x);
     313    if (ix < 0x4000b6db) /* 2.85711669921875 */
     314      {
     315        R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
     316            s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
     317        S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
     318            s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
     319      }
     320    else
     321      {				/* |x| >= 1/0.35 */
     322        R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
     323           s * (rb[5] + s * (rb[6] + s * rb[7]))))));
     324        S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
     325           s * (sb[5] + s * (sb[6] + s))))));
     326      }
     327    z = x;
     328    GET_LDOUBLE_WORDS (i, i0, i1, z);
     329    i1 = 0;
     330    SET_LDOUBLE_WORDS (z, i, i0, i1);
     331    r =
     332      __ieee754_expl (-z * z - 0.5625) * __ieee754_expl ((z - x) * (z + x) +
     333  						     R / S);
     334    if ((se & 0x8000) == 0)
     335      return one - r / x;
     336    else
     337      return r / x - one;
     338  }
     339  
     340  libm_alias_ldouble (__erf, erf)
     341  long double
     342  __erfcl (long double x)
     343  {
     344    int32_t hx, ix;
     345    long double R, S, P, Q, s, y, z, r;
     346    uint32_t se, i0, i1;
     347  
     348    GET_LDOUBLE_WORDS (se, i0, i1, x);
     349    ix = se & 0x7fff;
     350    if (ix >= 0x7fff)
     351      {				/* erfc(nan)=nan */
     352        /* erfc(+-inf)=0,2 */
     353        return (long double) (((se & 0xffff) >> 15) << 1) + one / x;
     354      }
     355  
     356    ix = (ix << 16) | (i0 >> 16);
     357    if (ix < 0x3ffed800) /* |x|<0.84375 */
     358      {
     359        if (ix < 0x3fbe0000) /* |x|<2**-65 */
     360  	return one - x;
     361        z = x * x;
     362        r = pp[0] + z * (pp[1]
     363            + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
     364        s = qq[0] + z * (qq[1]
     365  	  + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
     366        y = r / s;
     367        if (ix < 0x3ffd8000) /* x<1/4 */
     368  	{
     369  	  return one - (x + x * y);
     370  	}
     371        else
     372  	{
     373  	  r = x * y;
     374  	  r += (x - half);
     375  	  return half - r;
     376  	}
     377      }
     378    if (ix < 0x3fffa000) /* 1.25 */
     379      {				/* 0.84375 <= |x| < 1.25 */
     380        s = fabsl (x) - one;
     381        P = pa[0] + s * (pa[1] + s * (pa[2]
     382          + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
     383        Q = qa[0] + s * (qa[1] + s * (qa[2]
     384          + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
     385        if ((se & 0x8000) == 0)
     386  	{
     387  	  z = one - erx;
     388  	  return z - P / Q;
     389  	}
     390        else
     391  	{
     392  	  z = erx + P / Q;
     393  	  return one + z;
     394  	}
     395      }
     396    if (ix < 0x4005d600) /* 107 */
     397      {				/* |x|<107 */
     398        x = fabsl (x);
     399        s = one / (x * x);
     400        if (ix < 0x4000b6db) /* 2.85711669921875 */
     401  	{			/* |x| < 1/.35 ~ 2.857143 */
     402  	  R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
     403                s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
     404  	  S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
     405                s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
     406  	}
     407        else if (ix < 0x4001d555) /* 6.6666259765625 */
     408  	{			/* 6.666 > |x| >= 1/.35 ~ 2.857143 */
     409  	  R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
     410  	      s * (rb[5] + s * (rb[6] + s * rb[7]))))));
     411  	  S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
     412                s * (sb[5] + s * (sb[6] + s))))));
     413  	}
     414        else
     415  	{			/* |x| >= 6.666 */
     416  	  if (se & 0x8000)
     417  	    return two - tiny;	/* x < -6.666 */
     418  
     419  	  R = rc[0] + s * (rc[1] + s * (rc[2] + s * (rc[3] +
     420  						    s * (rc[4] + s * rc[5]))));
     421  	  S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
     422  						    s * (sc[4] + s))));
     423  	}
     424        z = x;
     425        GET_LDOUBLE_WORDS (hx, i0, i1, z);
     426        i1 = 0;
     427        i0 &= 0xffffff00;
     428        SET_LDOUBLE_WORDS (z, hx, i0, i1);
     429        r = __ieee754_expl (-z * z - 0.5625) *
     430  	__ieee754_expl ((z - x) * (z + x) + R / S);
     431        if ((se & 0x8000) == 0)
     432  	{
     433  	  long double ret = r / x;
     434  	  if (ret == 0)
     435  	    __set_errno (ERANGE);
     436  	  return ret;
     437  	}
     438        else
     439  	return two - r / x;
     440      }
     441    else
     442      {
     443        if ((se & 0x8000) == 0)
     444  	{
     445  	  __set_errno (ERANGE);
     446  	  return tiny * tiny;
     447  	}
     448        else
     449  	return two - tiny;
     450      }
     451  }
     452  
     453  libm_alias_ldouble (__erfc, erfc)