1  /*
       2   * ====================================================
       3   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       4   *
       5   * Developed at SunPro, a Sun Microsystems, Inc. business.
       6   * Permission to use, copy, modify, and distribute this
       7   * software is freely granted, provided that this notice
       8   * is preserved.
       9   * ====================================================
      10   */
      11  
      12  /* Expansions and modifications for 128-bit long double are
      13     Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
      14     and are incorporated herein by permission of the author.  The author
      15     reserves the right to distribute this material elsewhere under different
      16     copying permissions.  These modifications are distributed here under
      17     the following terms:
      18  
      19      This library is free software; you can redistribute it and/or
      20      modify it under the terms of the GNU Lesser General Public
      21      License as published by the Free Software Foundation; either
      22      version 2.1 of the License, or (at your option) any later version.
      23  
      24      This library is distributed in the hope that it will be useful,
      25      but WITHOUT ANY WARRANTY; without even the implied warranty of
      26      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      27      Lesser General Public License for more details.
      28  
      29      You should have received a copy of the GNU Lesser General Public
      30      License along with this library; if not, see
      31      <https://www.gnu.org/licenses/>.  */
      32  
      33  /* __ieee754_powl(x,y) return x**y
      34   *
      35   *		      n
      36   * Method:  Let x =  2   * (1+f)
      37   *	1. Compute and return log2(x) in two pieces:
      38   *		log2(x) = w1 + w2,
      39   *	   where w1 has 113-53 = 60 bit trailing zeros.
      40   *	2. Perform y*log2(x) = n+y' by simulating muti-precision
      41   *	   arithmetic, where |y'|<=0.5.
      42   *	3. Return x**y = 2**n*exp(y'*log2)
      43   *
      44   * Special cases:
      45   *	1.  (anything) ** 0  is 1
      46   *	2.  (anything) ** 1  is itself
      47   *	3.  (anything) ** NAN is NAN
      48   *	4.  NAN ** (anything except 0) is NAN
      49   *	5.  +-(|x| > 1) **  +INF is +INF
      50   *	6.  +-(|x| > 1) **  -INF is +0
      51   *	7.  +-(|x| < 1) **  +INF is +0
      52   *	8.  +-(|x| < 1) **  -INF is +INF
      53   *	9.  +-1         ** +-INF is NAN
      54   *	10. +0 ** (+anything except 0, NAN)               is +0
      55   *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
      56   *	12. +0 ** (-anything except 0, NAN)               is +INF
      57   *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
      58   *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
      59   *	15. +INF ** (+anything except 0,NAN) is +INF
      60   *	16. +INF ** (-anything except 0,NAN) is +0
      61   *	17. -INF ** (anything)  = -0 ** (-anything)
      62   *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
      63   *	19. (-anything except 0 and inf) ** (non-integer) is NAN
      64   *
      65   */
      66  
      67  #include <math.h>
      68  #include <math-barriers.h>
      69  #include <math_private.h>
      70  #include <libm-alias-finite.h>
      71  
      72  static const _Float128 bp[] = {
      73    1,
      74    L(1.5),
      75  };
      76  
      77  /* log_2(1.5) */
      78  static const _Float128 dp_h[] = {
      79    0.0,
      80    L(5.8496250072115607565592654282227158546448E-1)
      81  };
      82  
      83  /* Low part of log_2(1.5) */
      84  static const _Float128 dp_l[] = {
      85    0.0,
      86    L(1.0579781240112554492329533686862998106046E-16)
      87  };
      88  
      89  static const _Float128 zero = 0,
      90    one = 1,
      91    two = 2,
      92    two113 = L(1.0384593717069655257060992658440192E34),
      93    huge = L(1.0e3000),
      94    tiny = L(1.0e-3000);
      95  
      96  /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
      97     z = (x-1)/(x+1)
      98     1 <= x <= 1.25
      99     Peak relative error 2.3e-37 */
     100  static const _Float128 LN[] =
     101  {
     102   L(-3.0779177200290054398792536829702930623200E1),
     103    L(6.5135778082209159921251824580292116201640E1),
     104   L(-4.6312921812152436921591152809994014413540E1),
     105    L(1.2510208195629420304615674658258363295208E1),
     106   L(-9.9266909031921425609179910128531667336670E-1)
     107  };
     108  static const _Float128 LD[] =
     109  {
     110   L(-5.129862866715009066465422805058933131960E1),
     111    L(1.452015077564081884387441590064272782044E2),
     112   L(-1.524043275549860505277434040464085593165E2),
     113    L(7.236063513651544224319663428634139768808E1),
     114   L(-1.494198912340228235853027849917095580053E1)
     115    /* 1.0E0 */
     116  };
     117  
     118  /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
     119     0 <= x <= 0.5
     120     Peak relative error 5.7e-38  */
     121  static const _Float128 PN[] =
     122  {
     123    L(5.081801691915377692446852383385968225675E8),
     124    L(9.360895299872484512023336636427675327355E6),
     125    L(4.213701282274196030811629773097579432957E4),
     126    L(5.201006511142748908655720086041570288182E1),
     127    L(9.088368420359444263703202925095675982530E-3),
     128  };
     129  static const _Float128 PD[] =
     130  {
     131    L(3.049081015149226615468111430031590411682E9),
     132    L(1.069833887183886839966085436512368982758E8),
     133    L(8.259257717868875207333991924545445705394E5),
     134    L(1.872583833284143212651746812884298360922E3),
     135    /* 1.0E0 */
     136  };
     137  
     138  static const _Float128
     139    /* ln 2 */
     140    lg2 = L(6.9314718055994530941723212145817656807550E-1),
     141    lg2_h = L(6.9314718055994528622676398299518041312695E-1),
     142    lg2_l = L(2.3190468138462996154948554638754786504121E-17),
     143    ovt = L(8.0085662595372944372e-0017),
     144    /* 2/(3*log(2)) */
     145    cp = L(9.6179669392597560490661645400126142495110E-1),
     146    cp_h = L(9.6179669392597555432899980587535537779331E-1),
     147    cp_l = L(5.0577616648125906047157785230014751039424E-17);
     148  
     149  _Float128
     150  __ieee754_powl (_Float128 x, _Float128 y)
     151  {
     152    _Float128 z, ax, z_h, z_l, p_h, p_l;
     153    _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
     154    _Float128 s2, s_h, s_l, t_h, t_l, ay;
     155    int32_t i, j, k, yisint, n;
     156    uint32_t ix, iy;
     157    int32_t hx, hy;
     158    ieee854_long_double_shape_type o, p, q;
     159  
     160    p.value = x;
     161    hx = p.parts32.w0;
     162    ix = hx & 0x7fffffff;
     163  
     164    q.value = y;
     165    hy = q.parts32.w0;
     166    iy = hy & 0x7fffffff;
     167  
     168  
     169    /* y==zero: x**0 = 1 */
     170    if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0
     171        && !issignaling (x))
     172      return one;
     173  
     174    /* 1.0**y = 1; -1.0**+-Inf = 1 */
     175    if (x == one && !issignaling (y))
     176      return one;
     177    if (x == -1 && iy == 0x7fff0000
     178        && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
     179      return one;
     180  
     181    /* +-NaN return x+y */
     182    if ((ix > 0x7fff0000)
     183        || ((ix == 0x7fff0000)
     184  	  && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
     185        || (iy > 0x7fff0000)
     186        || ((iy == 0x7fff0000)
     187  	  && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
     188      return x + y;
     189  
     190    /* determine if y is an odd int when x < 0
     191     * yisint = 0       ... y is not an integer
     192     * yisint = 1       ... y is an odd int
     193     * yisint = 2       ... y is an even int
     194     */
     195    yisint = 0;
     196    if (hx < 0)
     197      {
     198        if (iy >= 0x40700000)	/* 2^113 */
     199  	yisint = 2;		/* even integer y */
     200        else if (iy >= 0x3fff0000)	/* 1.0 */
     201  	{
     202  	  if (floorl (y) == y)
     203  	    {
     204  	      z = 0.5 * y;
     205  	      if (floorl (z) == z)
     206  		yisint = 2;
     207  	      else
     208  		yisint = 1;
     209  	    }
     210  	}
     211      }
     212  
     213    /* special value of y */
     214    if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
     215      {
     216        if (iy == 0x7fff0000)	/* y is +-inf */
     217  	{
     218  	  if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
     219  	      == 0)
     220  	    return y - y;	/* +-1**inf is NaN */
     221  	  else if (ix >= 0x3fff0000)	/* (|x|>1)**+-inf = inf,0 */
     222  	    return (hy >= 0) ? y : zero;
     223  	  else			/* (|x|<1)**-,+inf = inf,0 */
     224  	    return (hy < 0) ? -y : zero;
     225  	}
     226        if (iy == 0x3fff0000)
     227  	{			/* y is  +-1 */
     228  	  if (hy < 0)
     229  	    return one / x;
     230  	  else
     231  	    return x;
     232  	}
     233        if (hy == 0x40000000)
     234  	return x * x;		/* y is  2 */
     235        if (hy == 0x3ffe0000)
     236  	{			/* y is  0.5 */
     237  	  if (hx >= 0)		/* x >= +0 */
     238  	    return sqrtl (x);
     239  	}
     240      }
     241  
     242    ax = fabsl (x);
     243    /* special value of x */
     244    if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
     245      {
     246        if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
     247  	{
     248  	  z = ax;		/*x is +-0,+-inf,+-1 */
     249  	  if (hy < 0)
     250  	    z = one / z;	/* z = (1/|x|) */
     251  	  if (hx < 0)
     252  	    {
     253  	      if (((ix - 0x3fff0000) | yisint) == 0)
     254  		{
     255  		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
     256  		}
     257  	      else if (yisint == 1)
     258  		z = -z;		/* (x<0)**odd = -(|x|**odd) */
     259  	    }
     260  	  return z;
     261  	}
     262      }
     263  
     264    /* (x<0)**(non-int) is NaN */
     265    if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
     266      return (x - x) / (x - x);
     267  
     268    /* sgn (sign of result -ve**odd) = -1 else = 1 */
     269    sgn = one;
     270    if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
     271      sgn = -one;			/* (-ve)**(odd int) */
     272  
     273    /* |y| is huge.
     274       2^-16495 = 1/2 of smallest representable value.
     275       If (1 - 1/131072)^y underflows, y > 1.4986e9 */
     276    if (iy > 0x401d654b)
     277      {
     278        /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
     279        if (iy > 0x407d654b)
     280  	{
     281  	  if (ix <= 0x3ffeffff)
     282  	    return (hy < 0) ? huge * huge : tiny * tiny;
     283  	  if (ix >= 0x3fff0000)
     284  	    return (hy > 0) ? huge * huge : tiny * tiny;
     285  	}
     286        /* over/underflow if x is not close to one */
     287        if (ix < 0x3ffeffff)
     288  	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
     289        if (ix > 0x3fff0000)
     290  	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
     291      }
     292  
     293    ay = y > 0 ? y : -y;
     294    if (ay < 0x1p-128)
     295      y = y < 0 ? -0x1p-128 : 0x1p-128;
     296  
     297    n = 0;
     298    /* take care subnormal number */
     299    if (ix < 0x00010000)
     300      {
     301        ax *= two113;
     302        n -= 113;
     303        o.value = ax;
     304        ix = o.parts32.w0;
     305      }
     306    n += ((ix) >> 16) - 0x3fff;
     307    j = ix & 0x0000ffff;
     308    /* determine interval */
     309    ix = j | 0x3fff0000;		/* normalize ix */
     310    if (j <= 0x3988)
     311      k = 0;			/* |x|<sqrt(3/2) */
     312    else if (j < 0xbb67)
     313      k = 1;			/* |x|<sqrt(3)   */
     314    else
     315      {
     316        k = 0;
     317        n += 1;
     318        ix -= 0x00010000;
     319      }
     320  
     321    o.value = ax;
     322    o.parts32.w0 = ix;
     323    ax = o.value;
     324  
     325    /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
     326    u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
     327    v = one / (ax + bp[k]);
     328    s = u * v;
     329    s_h = s;
     330  
     331    o.value = s_h;
     332    o.parts32.w3 = 0;
     333    o.parts32.w2 &= 0xf8000000;
     334    s_h = o.value;
     335    /* t_h=ax+bp[k] High */
     336    t_h = ax + bp[k];
     337    o.value = t_h;
     338    o.parts32.w3 = 0;
     339    o.parts32.w2 &= 0xf8000000;
     340    t_h = o.value;
     341    t_l = ax - (t_h - bp[k]);
     342    s_l = v * ((u - s_h * t_h) - s_h * t_l);
     343    /* compute log(ax) */
     344    s2 = s * s;
     345    u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
     346    v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
     347    r = s2 * s2 * u / v;
     348    r += s_l * (s_h + s);
     349    s2 = s_h * s_h;
     350    t_h = 3.0 + s2 + r;
     351    o.value = t_h;
     352    o.parts32.w3 = 0;
     353    o.parts32.w2 &= 0xf8000000;
     354    t_h = o.value;
     355    t_l = r - ((t_h - 3.0) - s2);
     356    /* u+v = s*(1+...) */
     357    u = s_h * t_h;
     358    v = s_l * t_h + t_l * s;
     359    /* 2/(3log2)*(s+...) */
     360    p_h = u + v;
     361    o.value = p_h;
     362    o.parts32.w3 = 0;
     363    o.parts32.w2 &= 0xf8000000;
     364    p_h = o.value;
     365    p_l = v - (p_h - u);
     366    z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
     367    z_l = cp_l * p_h + p_l * cp + dp_l[k];
     368    /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
     369    t = (_Float128) n;
     370    t1 = (((z_h + z_l) + dp_h[k]) + t);
     371    o.value = t1;
     372    o.parts32.w3 = 0;
     373    o.parts32.w2 &= 0xf8000000;
     374    t1 = o.value;
     375    t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
     376  
     377    /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
     378    y1 = y;
     379    o.value = y1;
     380    o.parts32.w3 = 0;
     381    o.parts32.w2 &= 0xf8000000;
     382    y1 = o.value;
     383    p_l = (y - y1) * t1 + y * t2;
     384    p_h = y1 * t1;
     385    z = p_l + p_h;
     386    o.value = z;
     387    j = o.parts32.w0;
     388    if (j >= 0x400d0000) /* z >= 16384 */
     389      {
     390        /* if z > 16384 */
     391        if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
     392  	return sgn * huge * huge;	/* overflow */
     393        else
     394  	{
     395  	  if (p_l + ovt > z - p_h)
     396  	    return sgn * huge * huge;	/* overflow */
     397  	}
     398      }
     399    else if ((j & 0x7fffffff) >= 0x400d01b9)	/* z <= -16495 */
     400      {
     401        /* z < -16495 */
     402        if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
     403  	  != 0)
     404  	return sgn * tiny * tiny;	/* underflow */
     405        else
     406  	{
     407  	  if (p_l <= z - p_h)
     408  	    return sgn * tiny * tiny;	/* underflow */
     409  	}
     410      }
     411    /* compute 2**(p_h+p_l) */
     412    i = j & 0x7fffffff;
     413    k = (i >> 16) - 0x3fff;
     414    n = 0;
     415    if (i > 0x3ffe0000)
     416      {				/* if |z| > 0.5, set n = [z+0.5] */
     417        n = floorl (z + L(0.5));
     418        t = n;
     419        p_h -= t;
     420      }
     421    t = p_l + p_h;
     422    o.value = t;
     423    o.parts32.w3 = 0;
     424    o.parts32.w2 &= 0xf8000000;
     425    t = o.value;
     426    u = t * lg2_h;
     427    v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
     428    z = u + v;
     429    w = v - (z - u);
     430    /*  exp(z) */
     431    t = z * z;
     432    u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
     433    v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
     434    t1 = z - t * u / v;
     435    r = (z * t1) / (t1 - two) - (w + z * w);
     436    z = one - (r - z);
     437    o.value = z;
     438    j = o.parts32.w0;
     439    j += (n << 16);
     440    if ((j >> 16) <= 0)
     441      {
     442        z = __scalbnl (z, n);	/* subnormal output */
     443        _Float128 force_underflow = z * z;
     444        math_force_eval (force_underflow);
     445      }
     446    else
     447      {
     448        o.parts32.w0 = j;
     449        z = o.value;
     450      }
     451    return sgn * z;
     452  }
     453  libm_alias_finite (__ieee754_powl, __powl)