1  /*
       2   * ====================================================
       3   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       4   *
       5   * Developed at SunPro, a Sun Microsystems, Inc. business.
       6   * Permission to use, copy, modify, and distribute this
       7   * software is freely granted, provided that this notice
       8   * is preserved.
       9   * ====================================================
      10   */
      11  
      12  /* Expansions and modifications for 128-bit long double are
      13     Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
      14     and are incorporated herein by permission of the author.  The author
      15     reserves the right to distribute this material elsewhere under different
      16     copying permissions.  These modifications are distributed here under
      17     the following terms:
      18  
      19      This library is free software; you can redistribute it and/or
      20      modify it under the terms of the GNU Lesser General Public
      21      License as published by the Free Software Foundation; either
      22      version 2.1 of the License, or (at your option) any later version.
      23  
      24      This library is distributed in the hope that it will be useful,
      25      but WITHOUT ANY WARRANTY; without even the implied warranty of
      26      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      27      Lesser General Public License for more details.
      28  
      29      You should have received a copy of the GNU Lesser General Public
      30      License along with this library; if not, see
      31      <http://www.gnu.org/licenses/>.  */
      32  
      33  /* powq(x,y) return x**y
      34   *
      35   *		      n
      36   * Method:  Let x =  2   * (1+f)
      37   *	1. Compute and return log2(x) in two pieces:
      38   *		log2(x) = w1 + w2,
      39   *	   where w1 has 113-53 = 60 bit trailing zeros.
      40   *	2. Perform y*log2(x) = n+y' by simulating muti-precision
      41   *	   arithmetic, where |y'|<=0.5.
      42   *	3. Return x**y = 2**n*exp(y'*log2)
      43   *
      44   * Special cases:
      45   *	1.  (anything) ** 0  is 1
      46   *	2.  (anything) ** 1  is itself
      47   *	3.  (anything) ** NAN is NAN
      48   *	4.  NAN ** (anything except 0) is NAN
      49   *	5.  +-(|x| > 1) **  +INF is +INF
      50   *	6.  +-(|x| > 1) **  -INF is +0
      51   *	7.  +-(|x| < 1) **  +INF is +0
      52   *	8.  +-(|x| < 1) **  -INF is +INF
      53   *	9.  +-1         ** +-INF is NAN
      54   *	10. +0 ** (+anything except 0, NAN)               is +0
      55   *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
      56   *	12. +0 ** (-anything except 0, NAN)               is +INF
      57   *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
      58   *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
      59   *	15. +INF ** (+anything except 0,NAN) is +INF
      60   *	16. +INF ** (-anything except 0,NAN) is +0
      61   *	17. -INF ** (anything)  = -0 ** (-anything)
      62   *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
      63   *	19. (-anything except 0 and inf) ** (non-integer) is NAN
      64   *
      65   */
      66  
      67  #include "quadmath-imp.h"
      68  
      69  static const __float128 bp[] = {
      70    1,
      71    1.5Q,
      72  };
      73  
      74  /* log_2(1.5) */
      75  static const __float128 dp_h[] = {
      76    0.0,
      77    5.8496250072115607565592654282227158546448E-1Q
      78  };
      79  
      80  /* Low part of log_2(1.5) */
      81  static const __float128 dp_l[] = {
      82    0.0,
      83    1.0579781240112554492329533686862998106046E-16Q
      84  };
      85  
      86  static const __float128 zero = 0,
      87    one = 1,
      88    two = 2,
      89    two113 = 1.0384593717069655257060992658440192E34Q,
      90    huge = 1.0e3000Q,
      91    tiny = 1.0e-3000Q;
      92  
      93  /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
      94     z = (x-1)/(x+1)
      95     1 <= x <= 1.25
      96     Peak relative error 2.3e-37 */
      97  static const __float128 LN[] =
      98  {
      99   -3.0779177200290054398792536829702930623200E1Q,
     100    6.5135778082209159921251824580292116201640E1Q,
     101   -4.6312921812152436921591152809994014413540E1Q,
     102    1.2510208195629420304615674658258363295208E1Q,
     103   -9.9266909031921425609179910128531667336670E-1Q
     104  };
     105  static const __float128 LD[] =
     106  {
     107   -5.129862866715009066465422805058933131960E1Q,
     108    1.452015077564081884387441590064272782044E2Q,
     109   -1.524043275549860505277434040464085593165E2Q,
     110    7.236063513651544224319663428634139768808E1Q,
     111   -1.494198912340228235853027849917095580053E1Q
     112    /* 1.0E0 */
     113  };
     114  
     115  /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
     116     0 <= x <= 0.5
     117     Peak relative error 5.7e-38  */
     118  static const __float128 PN[] =
     119  {
     120    5.081801691915377692446852383385968225675E8Q,
     121    9.360895299872484512023336636427675327355E6Q,
     122    4.213701282274196030811629773097579432957E4Q,
     123    5.201006511142748908655720086041570288182E1Q,
     124    9.088368420359444263703202925095675982530E-3Q,
     125  };
     126  static const __float128 PD[] =
     127  {
     128    3.049081015149226615468111430031590411682E9Q,
     129    1.069833887183886839966085436512368982758E8Q,
     130    8.259257717868875207333991924545445705394E5Q,
     131    1.872583833284143212651746812884298360922E3Q,
     132    /* 1.0E0 */
     133  };
     134  
     135  static const __float128
     136    /* ln 2 */
     137    lg2 = 6.9314718055994530941723212145817656807550E-1Q,
     138    lg2_h = 6.9314718055994528622676398299518041312695E-1Q,
     139    lg2_l = 2.3190468138462996154948554638754786504121E-17Q,
     140    ovt = 8.0085662595372944372e-0017Q,
     141    /* 2/(3*log(2)) */
     142    cp = 9.6179669392597560490661645400126142495110E-1Q,
     143    cp_h = 9.6179669392597555432899980587535537779331E-1Q,
     144    cp_l = 5.0577616648125906047157785230014751039424E-17Q;
     145  
     146  __float128
     147  powq (__float128 x, __float128 y)
     148  {
     149    __float128 z, ax, z_h, z_l, p_h, p_l;
     150    __float128 y1, t1, t2, r, s, sgn, t, u, v, w;
     151    __float128 s2, s_h, s_l, t_h, t_l, ay;
     152    int32_t i, j, k, yisint, n;
     153    uint32_t ix, iy;
     154    int32_t hx, hy;
     155    ieee854_float128 o, p, q;
     156  
     157    p.value = x;
     158    hx = p.words32.w0;
     159    ix = hx & 0x7fffffff;
     160  
     161    q.value = y;
     162    hy = q.words32.w0;
     163    iy = hy & 0x7fffffff;
     164  
     165  
     166    /* y==zero: x**0 = 1 */
     167    if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0
     168        && !issignalingq (x))
     169      return one;
     170  
     171    /* 1.0**y = 1; -1.0**+-Inf = 1 */
     172    if (x == one && !issignalingq (y))
     173      return one;
     174    if (x == -1 && iy == 0x7fff0000
     175        && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
     176      return one;
     177  
     178    /* +-NaN return x+y */
     179    if ((ix > 0x7fff0000)
     180        || ((ix == 0x7fff0000)
     181  	  && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0))
     182        || (iy > 0x7fff0000)
     183        || ((iy == 0x7fff0000)
     184  	  && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0)))
     185      return x + y;
     186  
     187    /* determine if y is an odd int when x < 0
     188     * yisint = 0       ... y is not an integer
     189     * yisint = 1       ... y is an odd int
     190     * yisint = 2       ... y is an even int
     191     */
     192    yisint = 0;
     193    if (hx < 0)
     194      {
     195        if (iy >= 0x40700000)	/* 2^113 */
     196  	yisint = 2;		/* even integer y */
     197        else if (iy >= 0x3fff0000)	/* 1.0 */
     198  	{
     199  	  if (floorq (y) == y)
     200  	    {
     201  	      z = 0.5 * y;
     202  	      if (floorq (z) == z)
     203  		yisint = 2;
     204  	      else
     205  		yisint = 1;
     206  	    }
     207  	}
     208      }
     209  
     210    /* special value of y */
     211    if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
     212      {
     213        if (iy == 0x7fff0000)	/* y is +-inf */
     214  	{
     215  	  if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3)
     216  	      == 0)
     217  	    return y - y;	/* +-1**inf is NaN */
     218  	  else if (ix >= 0x3fff0000)	/* (|x|>1)**+-inf = inf,0 */
     219  	    return (hy >= 0) ? y : zero;
     220  	  else			/* (|x|<1)**-,+inf = inf,0 */
     221  	    return (hy < 0) ? -y : zero;
     222  	}
     223        if (iy == 0x3fff0000)
     224  	{			/* y is  +-1 */
     225  	  if (hy < 0)
     226  	    return one / x;
     227  	  else
     228  	    return x;
     229  	}
     230        if (hy == 0x40000000)
     231  	return x * x;		/* y is  2 */
     232        if (hy == 0x3ffe0000)
     233  	{			/* y is  0.5 */
     234  	  if (hx >= 0)		/* x >= +0 */
     235  	    return sqrtq (x);
     236  	}
     237      }
     238  
     239    ax = fabsq (x);
     240    /* special value of x */
     241    if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0)
     242      {
     243        if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
     244  	{
     245  	  z = ax;		/*x is +-0,+-inf,+-1 */
     246  	  if (hy < 0)
     247  	    z = one / z;	/* z = (1/|x|) */
     248  	  if (hx < 0)
     249  	    {
     250  	      if (((ix - 0x3fff0000) | yisint) == 0)
     251  		{
     252  		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
     253  		}
     254  	      else if (yisint == 1)
     255  		z = -z;		/* (x<0)**odd = -(|x|**odd) */
     256  	    }
     257  	  return z;
     258  	}
     259      }
     260  
     261    /* (x<0)**(non-int) is NaN */
     262    if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
     263      return (x - x) / (x - x);
     264  
     265    /* sgn (sign of result -ve**odd) = -1 else = 1 */
     266    sgn = one;
     267    if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
     268      sgn = -one;			/* (-ve)**(odd int) */
     269  
     270    /* |y| is huge.
     271       2^-16495 = 1/2 of smallest representable value.
     272       If (1 - 1/131072)^y underflows, y > 1.4986e9 */
     273    if (iy > 0x401d654b)
     274      {
     275        /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
     276        if (iy > 0x407d654b)
     277  	{
     278  	  if (ix <= 0x3ffeffff)
     279  	    return (hy < 0) ? huge * huge : tiny * tiny;
     280  	  if (ix >= 0x3fff0000)
     281  	    return (hy > 0) ? huge * huge : tiny * tiny;
     282  	}
     283        /* over/underflow if x is not close to one */
     284        if (ix < 0x3ffeffff)
     285  	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
     286        if (ix > 0x3fff0000)
     287  	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
     288      }
     289  
     290    ay = y > 0 ? y : -y;
     291    if (ay < 0x1p-128)
     292      y = y < 0 ? -0x1p-128 : 0x1p-128;
     293  
     294    n = 0;
     295    /* take care subnormal number */
     296    if (ix < 0x00010000)
     297      {
     298        ax *= two113;
     299        n -= 113;
     300        o.value = ax;
     301        ix = o.words32.w0;
     302      }
     303    n += ((ix) >> 16) - 0x3fff;
     304    j = ix & 0x0000ffff;
     305    /* determine interval */
     306    ix = j | 0x3fff0000;		/* normalize ix */
     307    if (j <= 0x3988)
     308      k = 0;			/* |x|<sqrt(3/2) */
     309    else if (j < 0xbb67)
     310      k = 1;			/* |x|<sqrt(3)   */
     311    else
     312      {
     313        k = 0;
     314        n += 1;
     315        ix -= 0x00010000;
     316      }
     317  
     318    o.value = ax;
     319    o.words32.w0 = ix;
     320    ax = o.value;
     321  
     322    /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
     323    u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
     324    v = one / (ax + bp[k]);
     325    s = u * v;
     326    s_h = s;
     327  
     328    o.value = s_h;
     329    o.words32.w3 = 0;
     330    o.words32.w2 &= 0xf8000000;
     331    s_h = o.value;
     332    /* t_h=ax+bp[k] High */
     333    t_h = ax + bp[k];
     334    o.value = t_h;
     335    o.words32.w3 = 0;
     336    o.words32.w2 &= 0xf8000000;
     337    t_h = o.value;
     338    t_l = ax - (t_h - bp[k]);
     339    s_l = v * ((u - s_h * t_h) - s_h * t_l);
     340    /* compute log(ax) */
     341    s2 = s * s;
     342    u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
     343    v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
     344    r = s2 * s2 * u / v;
     345    r += s_l * (s_h + s);
     346    s2 = s_h * s_h;
     347    t_h = 3.0 + s2 + r;
     348    o.value = t_h;
     349    o.words32.w3 = 0;
     350    o.words32.w2 &= 0xf8000000;
     351    t_h = o.value;
     352    t_l = r - ((t_h - 3.0) - s2);
     353    /* u+v = s*(1+...) */
     354    u = s_h * t_h;
     355    v = s_l * t_h + t_l * s;
     356    /* 2/(3log2)*(s+...) */
     357    p_h = u + v;
     358    o.value = p_h;
     359    o.words32.w3 = 0;
     360    o.words32.w2 &= 0xf8000000;
     361    p_h = o.value;
     362    p_l = v - (p_h - u);
     363    z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
     364    z_l = cp_l * p_h + p_l * cp + dp_l[k];
     365    /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
     366    t = (__float128) n;
     367    t1 = (((z_h + z_l) + dp_h[k]) + t);
     368    o.value = t1;
     369    o.words32.w3 = 0;
     370    o.words32.w2 &= 0xf8000000;
     371    t1 = o.value;
     372    t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
     373  
     374    /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
     375    y1 = y;
     376    o.value = y1;
     377    o.words32.w3 = 0;
     378    o.words32.w2 &= 0xf8000000;
     379    y1 = o.value;
     380    p_l = (y - y1) * t1 + y * t2;
     381    p_h = y1 * t1;
     382    z = p_l + p_h;
     383    o.value = z;
     384    j = o.words32.w0;
     385    if (j >= 0x400d0000) /* z >= 16384 */
     386      {
     387        /* if z > 16384 */
     388        if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0)
     389  	return sgn * huge * huge;	/* overflow */
     390        else
     391  	{
     392  	  if (p_l + ovt > z - p_h)
     393  	    return sgn * huge * huge;	/* overflow */
     394  	}
     395      }
     396    else if ((j & 0x7fffffff) >= 0x400d01b9)	/* z <= -16495 */
     397      {
     398        /* z < -16495 */
     399        if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3)
     400  	  != 0)
     401  	return sgn * tiny * tiny;	/* underflow */
     402        else
     403  	{
     404  	  if (p_l <= z - p_h)
     405  	    return sgn * tiny * tiny;	/* underflow */
     406  	}
     407      }
     408    /* compute 2**(p_h+p_l) */
     409    i = j & 0x7fffffff;
     410    k = (i >> 16) - 0x3fff;
     411    n = 0;
     412    if (i > 0x3ffe0000)
     413      {				/* if |z| > 0.5, set n = [z+0.5] */
     414        n = floorq (z + 0.5Q);
     415        t = n;
     416        p_h -= t;
     417      }
     418    t = p_l + p_h;
     419    o.value = t;
     420    o.words32.w3 = 0;
     421    o.words32.w2 &= 0xf8000000;
     422    t = o.value;
     423    u = t * lg2_h;
     424    v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
     425    z = u + v;
     426    w = v - (z - u);
     427    /*  exp(z) */
     428    t = z * z;
     429    u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
     430    v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
     431    t1 = z - t * u / v;
     432    r = (z * t1) / (t1 - two) - (w + z * w);
     433    z = one - (r - z);
     434    o.value = z;
     435    j = o.words32.w0;
     436    j += (n << 16);
     437    if ((j >> 16) <= 0)
     438      {
     439        z = scalbnq (z, n);	/* subnormal output */
     440        __float128 force_underflow = z * z;
     441        math_force_eval (force_underflow);
     442      }
     443    else
     444      {
     445        o.words32.w0 = j;
     446        z = o.value;
     447      }
     448    return sgn * z;
     449  }