1  /*
       2   * ====================================================
       3   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       4   *
       5   * Developed at SunPro, a Sun Microsystems, Inc. business.
       6   * Permission to use, copy, modify, and distribute this
       7   * software is freely granted, provided that this notice
       8   * is preserved.
       9   * ====================================================
      10   */
      11  
      12  /*
      13     Long double expansions are
      14     Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
      15     and are incorporated herein by permission of the author.  The author
      16     reserves the right to distribute this material elsewhere under different
      17     copying permissions.  These modifications are distributed here under
      18     the following terms:
      19  
      20      This library is free software; you can redistribute it and/or
      21      modify it under the terms of the GNU Lesser General Public
      22      License as published by the Free Software Foundation; either
      23      version 2.1 of the License, or (at your option) any later version.
      24  
      25      This library is distributed in the hope that it will be useful,
      26      but WITHOUT ANY WARRANTY; without even the implied warranty of
      27      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      28      Lesser General Public License for more details.
      29  
      30      You should have received a copy of the GNU Lesser General Public
      31      License along with this library; if not, see
      32      <http://www.gnu.org/licenses/>.  */
      33  
      34  /* acosq(x)
      35   * Method :
      36   *      acos(x)  = pi/2 - asin(x)
      37   *      acos(-x) = pi/2 + asin(x)
      38   * For |x| <= 0.375
      39   *      acos(x) = pi/2 - asin(x)
      40   * Between .375 and .5 the approximation is
      41   *      acos(0.4375 + x) = acos(0.4375) + x P(x) / Q(x)
      42   * Between .5 and .625 the approximation is
      43   *      acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
      44   * For x > 0.625,
      45   *      acos(x) = 2 asin(sqrt((1-x)/2))
      46   *      computed with an extended precision square root in the leading term.
      47   * For x < -0.625
      48   *      acos(x) = pi - 2 asin(sqrt((1-|x|)/2))
      49   *
      50   * Special cases:
      51   *      if x is NaN, return x itself;
      52   *      if |x|>1, return NaN with invalid signal.
      53   *
      54   * Functions needed: sqrtq.
      55   */
      56  
      57  #include "quadmath-imp.h"
      58  
      59  static const __float128
      60    one = 1,
      61    pio2_hi = 1.5707963267948966192313216916397514420986Q,
      62    pio2_lo = 4.3359050650618905123985220130216759843812E-35Q,
      63  
      64    /* acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
      65       -0.0625 <= x <= 0.0625
      66       peak relative error 3.3e-35  */
      67  
      68    rS0 =  5.619049346208901520945464704848780243887E0Q,
      69    rS1 = -4.460504162777731472539175700169871920352E1Q,
      70    rS2 =  1.317669505315409261479577040530751477488E2Q,
      71    rS3 = -1.626532582423661989632442410808596009227E2Q,
      72    rS4 =  3.144806644195158614904369445440583873264E1Q,
      73    rS5 =  9.806674443470740708765165604769099559553E1Q,
      74    rS6 = -5.708468492052010816555762842394927806920E1Q,
      75    rS7 = -1.396540499232262112248553357962639431922E1Q,
      76    rS8 =  1.126243289311910363001762058295832610344E1Q,
      77    rS9 =  4.956179821329901954211277873774472383512E-1Q,
      78    rS10 = -3.313227657082367169241333738391762525780E-1Q,
      79  
      80    sS0 = -4.645814742084009935700221277307007679325E0Q,
      81    sS1 =  3.879074822457694323970438316317961918430E1Q,
      82    sS2 = -1.221986588013474694623973554726201001066E2Q,
      83    sS3 =  1.658821150347718105012079876756201905822E2Q,
      84    sS4 = -4.804379630977558197953176474426239748977E1Q,
      85    sS5 = -1.004296417397316948114344573811562952793E2Q,
      86    sS6 =  7.530281592861320234941101403870010111138E1Q,
      87    sS7 =  1.270735595411673647119592092304357226607E1Q,
      88    sS8 = -1.815144839646376500705105967064792930282E1Q,
      89    sS9 = -7.821597334910963922204235247786840828217E-2Q,
      90    /* 1.000000000000000000000000000000000000000E0 */
      91  
      92    acosr5625 = 9.7338991014954640492751132535550279812151E-1Q,
      93    pimacosr5625 = 2.1682027434402468335351320579240000860757E0Q,
      94  
      95    /* acos(0.4375 + x) = acos(0.4375) + x rS(x) / sS(x)
      96       -0.0625 <= x <= 0.0625
      97       peak relative error 2.1e-35  */
      98  
      99    P0 =  2.177690192235413635229046633751390484892E0Q,
     100    P1 = -2.848698225706605746657192566166142909573E1Q,
     101    P2 =  1.040076477655245590871244795403659880304E2Q,
     102    P3 = -1.400087608918906358323551402881238180553E2Q,
     103    P4 =  2.221047917671449176051896400503615543757E1Q,
     104    P5 =  9.643714856395587663736110523917499638702E1Q,
     105    P6 = -5.158406639829833829027457284942389079196E1Q,
     106    P7 = -1.578651828337585944715290382181219741813E1Q,
     107    P8 =  1.093632715903802870546857764647931045906E1Q,
     108    P9 =  5.448925479898460003048760932274085300103E-1Q,
     109    P10 = -3.315886001095605268470690485170092986337E-1Q,
     110    Q0 = -1.958219113487162405143608843774587557016E0Q,
     111    Q1 =  2.614577866876185080678907676023269360520E1Q,
     112    Q2 = -9.990858606464150981009763389881793660938E1Q,
     113    Q3 =  1.443958741356995763628660823395334281596E2Q,
     114    Q4 = -3.206441012484232867657763518369723873129E1Q,
     115    Q5 = -1.048560885341833443564920145642588991492E2Q,
     116    Q6 =  6.745883931909770880159915641984874746358E1Q,
     117    Q7 =  1.806809656342804436118449982647641392951E1Q,
     118    Q8 = -1.770150690652438294290020775359580915464E1Q,
     119    Q9 = -5.659156469628629327045433069052560211164E-1Q,
     120    /* 1.000000000000000000000000000000000000000E0 */
     121  
     122    acosr4375 = 1.1179797320499710475919903296900511518755E0Q,
     123    pimacosr4375 = 2.0236129215398221908706530535894517323217E0Q,
     124  
     125    /* asin(x) = x + x^3 pS(x^2) / qS(x^2)
     126       0 <= x <= 0.5
     127       peak relative error 1.9e-35  */
     128    pS0 = -8.358099012470680544198472400254596543711E2Q,
     129    pS1 =  3.674973957689619490312782828051860366493E3Q,
     130    pS2 = -6.730729094812979665807581609853656623219E3Q,
     131    pS3 =  6.643843795209060298375552684423454077633E3Q,
     132    pS4 = -3.817341990928606692235481812252049415993E3Q,
     133    pS5 =  1.284635388402653715636722822195716476156E3Q,
     134    pS6 = -2.410736125231549204856567737329112037867E2Q,
     135    pS7 =  2.219191969382402856557594215833622156220E1Q,
     136    pS8 = -7.249056260830627156600112195061001036533E-1Q,
     137    pS9 =  1.055923570937755300061509030361395604448E-3Q,
     138  
     139    qS0 = -5.014859407482408326519083440151745519205E3Q,
     140    qS1 =  2.430653047950480068881028451580393430537E4Q,
     141    qS2 = -4.997904737193653607449250593976069726962E4Q,
     142    qS3 =  5.675712336110456923807959930107347511086E4Q,
     143    qS4 = -3.881523118339661268482937768522572588022E4Q,
     144    qS5 =  1.634202194895541569749717032234510811216E4Q,
     145    qS6 = -4.151452662440709301601820849901296953752E3Q,
     146    qS7 =  5.956050864057192019085175976175695342168E2Q,
     147    qS8 = -4.175375777334867025769346564600396877176E1Q;
     148    /* 1.000000000000000000000000000000000000000E0 */
     149  
     150  __float128
     151  acosq (__float128 x)
     152  {
     153    __float128 z, r, w, p, q, s, t, f2;
     154    int32_t ix, sign;
     155    ieee854_float128 u;
     156  
     157    u.value = x;
     158    sign = u.words32.w0;
     159    ix = sign & 0x7fffffff;
     160    u.words32.w0 = ix;		/* |x| */
     161    if (ix >= 0x3fff0000)		/* |x| >= 1 */
     162      {
     163        if (ix == 0x3fff0000
     164  	  && (u.words32.w1 | u.words32.w2 | u.words32.w3) == 0)
     165  	{			/* |x| == 1 */
     166  	  if ((sign & 0x80000000) == 0)
     167  	    return 0.0;		/* acos(1) = 0  */
     168  	  else
     169  	    return (2.0 * pio2_hi) + (2.0 * pio2_lo);	/* acos(-1)= pi */
     170  	}
     171        return (x - x) / (x - x);	/* acos(|x| > 1) is NaN */
     172      }
     173    else if (ix < 0x3ffe0000)	/* |x| < 0.5 */
     174      {
     175        if (ix < 0x3f8e0000)	/* |x| < 2**-113 */
     176  	return pio2_hi + pio2_lo;
     177        if (ix < 0x3ffde000)	/* |x| < .4375 */
     178  	{
     179  	  /* Arcsine of x.  */
     180  	  z = x * x;
     181  	  p = (((((((((pS9 * z
     182  		       + pS8) * z
     183  		      + pS7) * z
     184  		     + pS6) * z
     185  		    + pS5) * z
     186  		   + pS4) * z
     187  		  + pS3) * z
     188  		 + pS2) * z
     189  		+ pS1) * z
     190  	       + pS0) * z;
     191  	  q = (((((((( z
     192  		       + qS8) * z
     193  		     + qS7) * z
     194  		    + qS6) * z
     195  		   + qS5) * z
     196  		  + qS4) * z
     197  		 + qS3) * z
     198  		+ qS2) * z
     199  	       + qS1) * z
     200  	    + qS0;
     201  	  r = x + x * p / q;
     202  	  z = pio2_hi - (r - pio2_lo);
     203  	  return z;
     204  	}
     205        /* .4375 <= |x| < .5 */
     206        t = u.value - 0.4375Q;
     207        p = ((((((((((P10 * t
     208  		    + P9) * t
     209  		   + P8) * t
     210  		  + P7) * t
     211  		 + P6) * t
     212  		+ P5) * t
     213  	       + P4) * t
     214  	      + P3) * t
     215  	     + P2) * t
     216  	    + P1) * t
     217  	   + P0) * t;
     218  
     219        q = (((((((((t
     220  		   + Q9) * t
     221  		  + Q8) * t
     222  		 + Q7) * t
     223  		+ Q6) * t
     224  	       + Q5) * t
     225  	      + Q4) * t
     226  	     + Q3) * t
     227  	    + Q2) * t
     228  	   + Q1) * t
     229  	+ Q0;
     230        r = p / q;
     231        if (sign & 0x80000000)
     232  	r = pimacosr4375 - r;
     233        else
     234  	r = acosr4375 + r;
     235        return r;
     236      }
     237    else if (ix < 0x3ffe4000)	/* |x| < 0.625 */
     238      {
     239        t = u.value - 0.5625Q;
     240        p = ((((((((((rS10 * t
     241  		    + rS9) * t
     242  		   + rS8) * t
     243  		  + rS7) * t
     244  		 + rS6) * t
     245  		+ rS5) * t
     246  	       + rS4) * t
     247  	      + rS3) * t
     248  	     + rS2) * t
     249  	    + rS1) * t
     250  	   + rS0) * t;
     251  
     252        q = (((((((((t
     253  		   + sS9) * t
     254  		  + sS8) * t
     255  		 + sS7) * t
     256  		+ sS6) * t
     257  	       + sS5) * t
     258  	      + sS4) * t
     259  	     + sS3) * t
     260  	    + sS2) * t
     261  	   + sS1) * t
     262  	+ sS0;
     263        if (sign & 0x80000000)
     264  	r = pimacosr5625 - p / q;
     265        else
     266  	r = acosr5625 + p / q;
     267        return r;
     268      }
     269    else
     270      {				/* |x| >= .625 */
     271        z = (one - u.value) * 0.5;
     272        s = sqrtq (z);
     273        /* Compute an extended precision square root from
     274  	 the Newton iteration  s -> 0.5 * (s + z / s).
     275  	 The change w from s to the improved value is
     276  	    w = 0.5 * (s + z / s) - s  = (s^2 + z)/2s - s = (z - s^2)/2s.
     277  	  Express s = f1 + f2 where f1 * f1 is exactly representable.
     278  	  w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
     279  	  s + w has extended precision.  */
     280        u.value = s;
     281        u.words32.w2 = 0;
     282        u.words32.w3 = 0;
     283        f2 = s - u.value;
     284        w = z - u.value * u.value;
     285        w = w - 2.0 * u.value * f2;
     286        w = w - f2 * f2;
     287        w = w / (2.0 * s);
     288        /* Arcsine of s.  */
     289        p = (((((((((pS9 * z
     290  		   + pS8) * z
     291  		  + pS7) * z
     292  		 + pS6) * z
     293  		+ pS5) * z
     294  	       + pS4) * z
     295  	      + pS3) * z
     296  	     + pS2) * z
     297  	    + pS1) * z
     298  	   + pS0) * z;
     299        q = (((((((( z
     300  		   + qS8) * z
     301  		 + qS7) * z
     302  		+ qS6) * z
     303  	       + qS5) * z
     304  	      + qS4) * z
     305  	     + qS3) * z
     306  	    + qS2) * z
     307  	   + qS1) * z
     308  	+ qS0;
     309        r = s + (w + s * p / q);
     310  
     311        if (sign & 0x80000000)
     312  	w = pio2_hi + (pio2_lo - r);
     313        else
     314  	w = r;
     315        return 2.0 * w;
     316      }
     317  }