1  /*
       2   * ====================================================
       3   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       4   *
       5   * Developed at SunPro, a Sun Microsystems, Inc. business.
       6   * Permission to use, copy, modify, and distribute this
       7   * software is freely granted, provided that this notice
       8   * is preserved.
       9   * ====================================================
      10   */
      11  
      12  /* Expansions and modifications for 128-bit long double are
      13     Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
      14     and are incorporated herein by permission of the author.  The author
      15     reserves the right to distribute this material elsewhere under different
      16     copying permissions.  These modifications are distributed here under
      17     the following terms:
      18  
      19      This library is free software; you can redistribute it and/or
      20      modify it under the terms of the GNU Lesser General Public
      21      License as published by the Free Software Foundation; either
      22      version 2.1 of the License, or (at your option) any later version.
      23  
      24      This library is distributed in the hope that it will be useful,
      25      but WITHOUT ANY WARRANTY; without even the implied warranty of
      26      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      27      Lesser General Public License for more details.
      28  
      29      You should have received a copy of the GNU Lesser General Public
      30      License along with this library; if not, see
      31      <https://www.gnu.org/licenses/>.  */
      32  
      33  /* __ieee754_powl(x,y) return x**y
      34   *
      35   *		      n
      36   * Method:  Let x =  2   * (1+f)
      37   *	1. Compute and return log2(x) in two pieces:
      38   *		log2(x) = w1 + w2,
      39   *	   where w1 has 113-53 = 60 bit trailing zeros.
      40   *	2. Perform y*log2(x) = n+y' by simulating muti-precision
      41   *	   arithmetic, where |y'|<=0.5.
      42   *	3. Return x**y = 2**n*exp(y'*log2)
      43   *
      44   * Special cases:
      45   *	1.  (anything) ** 0  is 1
      46   *	2.  (anything) ** 1  is itself
      47   *	3.  (anything) ** NAN is NAN
      48   *	4.  NAN ** (anything except 0) is NAN
      49   *	5.  +-(|x| > 1) **  +INF is +INF
      50   *	6.  +-(|x| > 1) **  -INF is +0
      51   *	7.  +-(|x| < 1) **  +INF is +0
      52   *	8.  +-(|x| < 1) **  -INF is +INF
      53   *	9.  +-1         ** +-INF is NAN
      54   *	10. +0 ** (+anything except 0, NAN)               is +0
      55   *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
      56   *	12. +0 ** (-anything except 0, NAN)               is +INF
      57   *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
      58   *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
      59   *	15. +INF ** (+anything except 0,NAN) is +INF
      60   *	16. +INF ** (-anything except 0,NAN) is +0
      61   *	17. -INF ** (anything)  = -0 ** (-anything)
      62   *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
      63   *	19. (-anything except 0 and inf) ** (non-integer) is NAN
      64   *
      65   */
      66  
      67  #include <math.h>
      68  #include <math_private.h>
      69  #include <math-underflow.h>
      70  #include <libm-alias-finite.h>
      71  
      72  static const long double bp[] = {
      73    1.0L,
      74    1.5L,
      75  };
      76  
      77  /* log_2(1.5) */
      78  static const long double dp_h[] = {
      79    0.0,
      80    5.8496250072115607565592654282227158546448E-1L
      81  };
      82  
      83  /* Low part of log_2(1.5) */
      84  static const long double dp_l[] = {
      85    0.0,
      86    1.0579781240112554492329533686862998106046E-16L
      87  };
      88  
      89  static const long double zero = 0.0L,
      90    one = 1.0L,
      91    two = 2.0L,
      92    two113 = 1.0384593717069655257060992658440192E34L,
      93    huge = 1.0e300L,
      94    tiny = 1.0e-300L;
      95  
      96  /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
      97     z = (x-1)/(x+1)
      98     1 <= x <= 1.25
      99     Peak relative error 2.3e-37 */
     100  static const long double LN[] =
     101  {
     102   -3.0779177200290054398792536829702930623200E1L,
     103    6.5135778082209159921251824580292116201640E1L,
     104   -4.6312921812152436921591152809994014413540E1L,
     105    1.2510208195629420304615674658258363295208E1L,
     106   -9.9266909031921425609179910128531667336670E-1L
     107  };
     108  static const long double LD[] =
     109  {
     110   -5.129862866715009066465422805058933131960E1L,
     111    1.452015077564081884387441590064272782044E2L,
     112   -1.524043275549860505277434040464085593165E2L,
     113    7.236063513651544224319663428634139768808E1L,
     114   -1.494198912340228235853027849917095580053E1L
     115    /* 1.0E0 */
     116  };
     117  
     118  /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
     119     0 <= x <= 0.5
     120     Peak relative error 5.7e-38  */
     121  static const long double PN[] =
     122  {
     123    5.081801691915377692446852383385968225675E8L,
     124    9.360895299872484512023336636427675327355E6L,
     125    4.213701282274196030811629773097579432957E4L,
     126    5.201006511142748908655720086041570288182E1L,
     127    9.088368420359444263703202925095675982530E-3L,
     128  };
     129  static const long double PD[] =
     130  {
     131    3.049081015149226615468111430031590411682E9L,
     132    1.069833887183886839966085436512368982758E8L,
     133    8.259257717868875207333991924545445705394E5L,
     134    1.872583833284143212651746812884298360922E3L,
     135    /* 1.0E0 */
     136  };
     137  
     138  static const long double
     139    /* ln 2 */
     140    lg2 = 6.9314718055994530941723212145817656807550E-1L,
     141    lg2_h = 6.9314718055994528622676398299518041312695E-1L,
     142    lg2_l = 2.3190468138462996154948554638754786504121E-17L,
     143    ovt = 8.0085662595372944372e-0017L,
     144    /* 2/(3*log(2)) */
     145    cp = 9.6179669392597560490661645400126142495110E-1L,
     146    cp_h = 9.6179669392597555432899980587535537779331E-1L,
     147    cp_l = 5.0577616648125906047157785230014751039424E-17L;
     148  
     149  long double
     150  __ieee754_powl (long double x, long double y)
     151  {
     152    long double z, ax, z_h, z_l, p_h, p_l;
     153    long double y1, t1, t2, r, s, sgn, t, u, v, w;
     154    long double s2, s_h, s_l, t_h, t_l, ay;
     155    int32_t i, j, k, yisint, n;
     156    uint32_t ix, iy;
     157    int32_t hx, hy, hax;
     158    double ohi, xhi, xlo, yhi, ylo;
     159    uint32_t lx, ly, lj;
     160  
     161    ldbl_unpack (x, &xhi, &xlo);
     162    EXTRACT_WORDS (hx, lx, xhi);
     163    ix = hx & 0x7fffffff;
     164  
     165    ldbl_unpack (y, &yhi, &ylo);
     166    EXTRACT_WORDS (hy, ly, yhi);
     167    iy = hy & 0x7fffffff;
     168  
     169    /* y==zero: x**0 = 1 */
     170    if ((iy | ly) == 0 && !issignaling (x))
     171      return one;
     172  
     173    /* 1.0**y = 1; -1.0**+-Inf = 1 */
     174    if (x == one && !issignaling (y))
     175      return one;
     176    if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
     177      return one;
     178  
     179    /* +-NaN return x+y */
     180    if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
     181        || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
     182      return x + y;
     183  
     184    /* determine if y is an odd int when x < 0
     185     * yisint = 0       ... y is not an integer
     186     * yisint = 1       ... y is an odd int
     187     * yisint = 2       ... y is an even int
     188     */
     189    yisint = 0;
     190    if (hx < 0)
     191      {
     192        uint32_t low_ye;
     193  
     194        GET_HIGH_WORD (low_ye, ylo);
     195        if ((low_ye & 0x7fffffff) >= 0x43400000)	/* Low part >= 2^53 */
     196  	yisint = 2;		/* even integer y */
     197        else if (iy >= 0x3ff00000)	/* 1.0 */
     198  	{
     199  	  if (floorl (y) == y)
     200  	    {
     201  	      z = 0.5 * y;
     202  	      if (floorl (z) == z)
     203  		yisint = 2;
     204  	      else
     205  		yisint = 1;
     206  	    }
     207  	}
     208      }
     209  
     210    ax = fabsl (x);
     211  
     212    /* special value of y */
     213    if (ly == 0)
     214      {
     215        if (iy == 0x7ff00000)	/* y is +-inf */
     216  	{
     217  	  if (ax > one)
     218  	    /* (|x|>1)**+-inf = inf,0 */
     219  	    return (hy >= 0) ? y : zero;
     220  	  else
     221  	    /* (|x|<1)**-,+inf = inf,0 */
     222  	    return (hy < 0) ? -y : zero;
     223  	}
     224        if (ylo == 0.0)
     225  	{
     226  	  if (iy == 0x3ff00000)
     227  	    {			/* y is  +-1 */
     228  	      if (hy < 0)
     229  		return one / x;
     230  	      else
     231  		return x;
     232  	    }
     233  	  if (hy == 0x40000000)
     234  	    return x * x;		/* y is  2 */
     235  	  if (hy == 0x3fe00000)
     236  	    {			/* y is  0.5 */
     237  	      if (hx >= 0)		/* x >= +0 */
     238  		return sqrtl (x);
     239  	    }
     240  	}
     241      }
     242  
     243    /* special value of x */
     244    if (lx == 0)
     245      {
     246        if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
     247  	{
     248  	  z = ax;		/*x is +-0,+-inf,+-1 */
     249  	  if (hy < 0)
     250  	    z = one / z;	/* z = (1/|x|) */
     251  	  if (hx < 0)
     252  	    {
     253  	      if (((ix - 0x3ff00000) | yisint) == 0)
     254  		{
     255  		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
     256  		}
     257  	      else if (yisint == 1)
     258  		z = -z;		/* (x<0)**odd = -(|x|**odd) */
     259  	    }
     260  	  return z;
     261  	}
     262      }
     263  
     264    /* (x<0)**(non-int) is NaN */
     265    if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
     266      return (x - x) / (x - x);
     267  
     268    /* sgn (sign of result -ve**odd) = -1 else = 1 */
     269    sgn = one;
     270    if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
     271      sgn = -one;			/* (-ve)**(odd int) */
     272  
     273    /* |y| is huge.
     274       2^-16495 = 1/2 of smallest representable value.
     275       If (1 - 1/131072)^y underflows, y > 1.4986e9 */
     276    if (iy > 0x41d654b0)
     277      {
     278        /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
     279        if (iy > 0x47d654b0)
     280  	{
     281  	  if (ix <= 0x3fefffff)
     282  	    return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
     283  	  if (ix >= 0x3ff00000)
     284  	    return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
     285  	}
     286        /* over/underflow if x is not close to one */
     287        if (ix < 0x3fefffff)
     288  	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
     289        if (ix > 0x3ff00000)
     290  	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
     291      }
     292  
     293    ay = y > 0 ? y : -y;
     294    if (ay < 0x1p-117)
     295      y = y < 0 ? -0x1p-117 : 0x1p-117;
     296  
     297    n = 0;
     298    /* take care subnormal number */
     299    if (ix < 0x00100000)
     300      {
     301        ax *= two113;
     302        n -= 113;
     303        ohi = ldbl_high (ax);
     304        GET_HIGH_WORD (ix, ohi);
     305      }
     306    n += ((ix) >> 20) - 0x3ff;
     307    j = ix & 0x000fffff;
     308    /* determine interval */
     309    ix = j | 0x3ff00000;		/* normalize ix */
     310    if (j <= 0x39880)
     311      k = 0;			/* |x|<sqrt(3/2) */
     312    else if (j < 0xbb670)
     313      k = 1;			/* |x|<sqrt(3)   */
     314    else
     315      {
     316        k = 0;
     317        n += 1;
     318        ix -= 0x00100000;
     319      }
     320  
     321    ohi = ldbl_high (ax);
     322    GET_HIGH_WORD (hax, ohi);
     323    ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
     324  
     325    /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
     326    u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
     327    v = one / (ax + bp[k]);
     328    s = u * v;
     329    s_h = ldbl_high (s);
     330  
     331    /* t_h=ax+bp[k] High */
     332    t_h = ax + bp[k];
     333    t_h = ldbl_high (t_h);
     334    t_l = ax - (t_h - bp[k]);
     335    s_l = v * ((u - s_h * t_h) - s_h * t_l);
     336    /* compute log(ax) */
     337    s2 = s * s;
     338    u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
     339    v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
     340    r = s2 * s2 * u / v;
     341    r += s_l * (s_h + s);
     342    s2 = s_h * s_h;
     343    t_h = 3.0 + s2 + r;
     344    t_h = ldbl_high (t_h);
     345    t_l = r - ((t_h - 3.0) - s2);
     346    /* u+v = s*(1+...) */
     347    u = s_h * t_h;
     348    v = s_l * t_h + t_l * s;
     349    /* 2/(3log2)*(s+...) */
     350    p_h = u + v;
     351    p_h = ldbl_high (p_h);
     352    p_l = v - (p_h - u);
     353    z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
     354    z_l = cp_l * p_h + p_l * cp + dp_l[k];
     355    /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
     356    t = (long double) n;
     357    t1 = (((z_h + z_l) + dp_h[k]) + t);
     358    t1 = ldbl_high (t1);
     359    t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
     360  
     361    /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
     362    y1 = ldbl_high (y);
     363    p_l = (y - y1) * t1 + y * t2;
     364    p_h = y1 * t1;
     365    z = p_l + p_h;
     366    ohi = ldbl_high (z);
     367    EXTRACT_WORDS (j, lj, ohi);
     368    if (j >= 0x40d00000) /* z >= 16384 */
     369      {
     370        /* if z > 16384 */
     371        if (((j - 0x40d00000) | lj) != 0)
     372  	return sgn * huge * huge;	/* overflow */
     373        else
     374  	{
     375  	  if (p_l + ovt > z - p_h)
     376  	    return sgn * huge * huge;	/* overflow */
     377  	}
     378      }
     379    else if ((j & 0x7fffffff) >= 0x40d01b90)	/* z <= -16495 */
     380      {
     381        /* z < -16495 */
     382        if (((j - 0xc0d01bc0) | lj) != 0)
     383  	return sgn * tiny * tiny;	/* underflow */
     384        else
     385  	{
     386  	  if (p_l <= z - p_h)
     387  	    return sgn * tiny * tiny;	/* underflow */
     388  	}
     389      }
     390    /* compute 2**(p_h+p_l) */
     391    i = j & 0x7fffffff;
     392    k = (i >> 20) - 0x3ff;
     393    n = 0;
     394    if (i > 0x3fe00000)
     395      {				/* if |z| > 0.5, set n = [z+0.5] */
     396        n = floorl (z + 0.5L);
     397        t = n;
     398        p_h -= t;
     399      }
     400    t = p_l + p_h;
     401    t = ldbl_high (t);
     402    u = t * lg2_h;
     403    v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
     404    z = u + v;
     405    w = v - (z - u);
     406    /*  exp(z) */
     407    t = z * z;
     408    u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
     409    v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
     410    t1 = z - t * u / v;
     411    r = (z * t1) / (t1 - two) - (w + z * w);
     412    z = one - (r - z);
     413    z = __scalbnl (sgn * z, n);
     414    math_check_force_underflow (z);
     415    return z;
     416  }
     417  libm_alias_finite (__ieee754_powl, __powl)