1  /*
       2   * ====================================================
       3   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       4   *
       5   * Developed at SunPro, a Sun Microsystems, Inc. business.
       6   * Permission to use, copy, modify, and distribute this
       7   * software is freely granted, provided that this notice
       8   * is preserved.
       9   * ====================================================
      10   */
      11  
      12  /*
      13     Long double expansions are
      14     Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
      15     and are incorporated herein by permission of the author.  The author
      16     reserves the right to distribute this material elsewhere under different
      17     copying permissions.  These modifications are distributed here under
      18     the following terms:
      19  
      20      This library is free software; you can redistribute it and/or
      21      modify it under the terms of the GNU Lesser General Public
      22      License as published by the Free Software Foundation; either
      23      version 2.1 of the License, or (at your option) any later version.
      24  
      25      This library is distributed in the hope that it will be useful,
      26      but WITHOUT ANY WARRANTY; without even the implied warranty of
      27      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      28      Lesser General Public License for more details.
      29  
      30      You should have received a copy of the GNU Lesser General Public
      31      License along with this library; if not, see
      32      <https://www.gnu.org/licenses/>.  */
      33  
      34  /* __ieee754_acosl(x)
      35   * Method :
      36   *      acos(x)  = pi/2 - asin(x)
      37   *      acos(-x) = pi/2 + asin(x)
      38   * For |x| <= 0.375
      39   *      acos(x) = pi/2 - asin(x)
      40   * Between .375 and .5 the approximation is
      41   *      acos(0.4375 + x) = acos(0.4375) + x P(x) / Q(x)
      42   * Between .5 and .625 the approximation is
      43   *      acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
      44   * For x > 0.625,
      45   *      acos(x) = 2 asin(sqrt((1-x)/2))
      46   *      computed with an extended precision square root in the leading term.
      47   * For x < -0.625
      48   *      acos(x) = pi - 2 asin(sqrt((1-|x|)/2))
      49   *
      50   * Special cases:
      51   *      if x is NaN, return x itself;
      52   *      if |x|>1, return NaN with invalid signal.
      53   *
      54   * Functions needed: sqrtl.
      55   */
      56  
      57  #include <math.h>
      58  #include <math_private.h>
      59  #include <libm-alias-finite.h>
      60  
      61  static const long double
      62    one = 1.0L,
      63    pio2_hi = 1.5707963267948966192313216916397514420986L,
      64    pio2_lo = 4.3359050650618905123985220130216759843812E-35L,
      65  
      66    /* acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
      67       -0.0625 <= x <= 0.0625
      68       peak relative error 3.3e-35  */
      69  
      70    rS0 =  5.619049346208901520945464704848780243887E0L,
      71    rS1 = -4.460504162777731472539175700169871920352E1L,
      72    rS2 =  1.317669505315409261479577040530751477488E2L,
      73    rS3 = -1.626532582423661989632442410808596009227E2L,
      74    rS4 =  3.144806644195158614904369445440583873264E1L,
      75    rS5 =  9.806674443470740708765165604769099559553E1L,
      76    rS6 = -5.708468492052010816555762842394927806920E1L,
      77    rS7 = -1.396540499232262112248553357962639431922E1L,
      78    rS8 =  1.126243289311910363001762058295832610344E1L,
      79    rS9 =  4.956179821329901954211277873774472383512E-1L,
      80    rS10 = -3.313227657082367169241333738391762525780E-1L,
      81  
      82    sS0 = -4.645814742084009935700221277307007679325E0L,
      83    sS1 =  3.879074822457694323970438316317961918430E1L,
      84    sS2 = -1.221986588013474694623973554726201001066E2L,
      85    sS3 =  1.658821150347718105012079876756201905822E2L,
      86    sS4 = -4.804379630977558197953176474426239748977E1L,
      87    sS5 = -1.004296417397316948114344573811562952793E2L,
      88    sS6 =  7.530281592861320234941101403870010111138E1L,
      89    sS7 =  1.270735595411673647119592092304357226607E1L,
      90    sS8 = -1.815144839646376500705105967064792930282E1L,
      91    sS9 = -7.821597334910963922204235247786840828217E-2L,
      92    /* 1.000000000000000000000000000000000000000E0 */
      93  
      94    acosr5625 = 9.7338991014954640492751132535550279812151E-1L,
      95    pimacosr5625 = 2.1682027434402468335351320579240000860757E0L,
      96  
      97    /* acos(0.4375 + x) = acos(0.4375) + x rS(x) / sS(x)
      98       -0.0625 <= x <= 0.0625
      99       peak relative error 2.1e-35  */
     100  
     101    P0 =  2.177690192235413635229046633751390484892E0L,
     102    P1 = -2.848698225706605746657192566166142909573E1L,
     103    P2 =  1.040076477655245590871244795403659880304E2L,
     104    P3 = -1.400087608918906358323551402881238180553E2L,
     105    P4 =  2.221047917671449176051896400503615543757E1L,
     106    P5 =  9.643714856395587663736110523917499638702E1L,
     107    P6 = -5.158406639829833829027457284942389079196E1L,
     108    P7 = -1.578651828337585944715290382181219741813E1L,
     109    P8 =  1.093632715903802870546857764647931045906E1L,
     110    P9 =  5.448925479898460003048760932274085300103E-1L,
     111    P10 = -3.315886001095605268470690485170092986337E-1L,
     112    Q0 = -1.958219113487162405143608843774587557016E0L,
     113    Q1 =  2.614577866876185080678907676023269360520E1L,
     114    Q2 = -9.990858606464150981009763389881793660938E1L,
     115    Q3 =  1.443958741356995763628660823395334281596E2L,
     116    Q4 = -3.206441012484232867657763518369723873129E1L,
     117    Q5 = -1.048560885341833443564920145642588991492E2L,
     118    Q6 =  6.745883931909770880159915641984874746358E1L,
     119    Q7 =  1.806809656342804436118449982647641392951E1L,
     120    Q8 = -1.770150690652438294290020775359580915464E1L,
     121    Q9 = -5.659156469628629327045433069052560211164E-1L,
     122    /* 1.000000000000000000000000000000000000000E0 */
     123  
     124    acosr4375 = 1.1179797320499710475919903296900511518755E0L,
     125    pimacosr4375 = 2.0236129215398221908706530535894517323217E0L,
     126  
     127    /* asin(x) = x + x^3 pS(x^2) / qS(x^2)
     128       0 <= x <= 0.5
     129       peak relative error 1.9e-35  */
     130    pS0 = -8.358099012470680544198472400254596543711E2L,
     131    pS1 =  3.674973957689619490312782828051860366493E3L,
     132    pS2 = -6.730729094812979665807581609853656623219E3L,
     133    pS3 =  6.643843795209060298375552684423454077633E3L,
     134    pS4 = -3.817341990928606692235481812252049415993E3L,
     135    pS5 =  1.284635388402653715636722822195716476156E3L,
     136    pS6 = -2.410736125231549204856567737329112037867E2L,
     137    pS7 =  2.219191969382402856557594215833622156220E1L,
     138    pS8 = -7.249056260830627156600112195061001036533E-1L,
     139    pS9 =  1.055923570937755300061509030361395604448E-3L,
     140  
     141    qS0 = -5.014859407482408326519083440151745519205E3L,
     142    qS1 =  2.430653047950480068881028451580393430537E4L,
     143    qS2 = -4.997904737193653607449250593976069726962E4L,
     144    qS3 =  5.675712336110456923807959930107347511086E4L,
     145    qS4 = -3.881523118339661268482937768522572588022E4L,
     146    qS5 =  1.634202194895541569749717032234510811216E4L,
     147    qS6 = -4.151452662440709301601820849901296953752E3L,
     148    qS7 =  5.956050864057192019085175976175695342168E2L,
     149    qS8 = -4.175375777334867025769346564600396877176E1L;
     150    /* 1.000000000000000000000000000000000000000E0 */
     151  
     152  long double
     153  __ieee754_acosl (long double x)
     154  {
     155    long double a, z, r, w, p, q, s, t, f2;
     156  
     157    if (__glibc_unlikely (isnan (x)))
     158      return x + x;
     159    a = __builtin_fabsl (x);
     160    if (a == 1.0L)
     161      {
     162        if (x > 0.0L)
     163  	return 0.0;		/* acos(1) = 0  */
     164        else
     165  	return (2.0 * pio2_hi) + (2.0 * pio2_lo);	/* acos(-1)= pi */
     166      }
     167    else if (a > 1.0L)
     168      {
     169        return (x - x) / (x - x);	/* acos(|x| > 1) is NaN */
     170      }
     171    if (a < 0.5L)
     172      {
     173        if (a < 0x1p-106L)
     174  	return pio2_hi + pio2_lo;
     175        if (a < 0.4375L)
     176  	{
     177  	  /* Arcsine of x.  */
     178  	  z = x * x;
     179  	  p = (((((((((pS9 * z
     180  		       + pS8) * z
     181  		      + pS7) * z
     182  		     + pS6) * z
     183  		    + pS5) * z
     184  		   + pS4) * z
     185  		  + pS3) * z
     186  		 + pS2) * z
     187  		+ pS1) * z
     188  	       + pS0) * z;
     189  	  q = (((((((( z
     190  		       + qS8) * z
     191  		     + qS7) * z
     192  		    + qS6) * z
     193  		   + qS5) * z
     194  		  + qS4) * z
     195  		 + qS3) * z
     196  		+ qS2) * z
     197  	       + qS1) * z
     198  	    + qS0;
     199  	  r = x + x * p / q;
     200  	  z = pio2_hi - (r - pio2_lo);
     201  	  return z;
     202  	}
     203        /* .4375 <= |x| < .5 */
     204        t = a - 0.4375L;
     205        p = ((((((((((P10 * t
     206  		    + P9) * t
     207  		   + P8) * t
     208  		  + P7) * t
     209  		 + P6) * t
     210  		+ P5) * t
     211  	       + P4) * t
     212  	      + P3) * t
     213  	     + P2) * t
     214  	    + P1) * t
     215  	   + P0) * t;
     216  
     217        q = (((((((((t
     218  		   + Q9) * t
     219  		  + Q8) * t
     220  		 + Q7) * t
     221  		+ Q6) * t
     222  	       + Q5) * t
     223  	      + Q4) * t
     224  	     + Q3) * t
     225  	    + Q2) * t
     226  	   + Q1) * t
     227  	+ Q0;
     228        r = p / q;
     229        if (x < 0.0L)
     230  	r = pimacosr4375 - r;
     231        else
     232  	r = acosr4375 + r;
     233        return r;
     234      }
     235    else if (a < 0.625L)
     236      {
     237        t = a - 0.5625L;
     238        p = ((((((((((rS10 * t
     239  		    + rS9) * t
     240  		   + rS8) * t
     241  		  + rS7) * t
     242  		 + rS6) * t
     243  		+ rS5) * t
     244  	       + rS4) * t
     245  	      + rS3) * t
     246  	     + rS2) * t
     247  	    + rS1) * t
     248  	   + rS0) * t;
     249  
     250        q = (((((((((t
     251  		   + sS9) * t
     252  		  + sS8) * t
     253  		 + sS7) * t
     254  		+ sS6) * t
     255  	       + sS5) * t
     256  	      + sS4) * t
     257  	     + sS3) * t
     258  	    + sS2) * t
     259  	   + sS1) * t
     260  	+ sS0;
     261        if (x < 0.0L)
     262  	r = pimacosr5625 - p / q;
     263        else
     264  	r = acosr5625 + p / q;
     265        return r;
     266      }
     267    else
     268      {				/* |x| >= .625 */
     269        double shi, slo;
     270  
     271        z = (one - a) * 0.5;
     272        s = sqrtl (z);
     273        /* Compute an extended precision square root from
     274  	 the Newton iteration  s -> 0.5 * (s + z / s).
     275  	 The change w from s to the improved value is
     276  	    w = 0.5 * (s + z / s) - s  = (s^2 + z)/2s - s = (z - s^2)/2s.
     277  	  Express s = f1 + f2 where f1 * f1 is exactly representable.
     278  	  w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
     279  	  s + w has extended precision.  */
     280        ldbl_unpack (s, &shi, &slo);
     281        a = shi;
     282        f2 = slo;
     283        w = z - a * a;
     284        w = w - 2.0 * a * f2;
     285        w = w - f2 * f2;
     286        w = w / (2.0 * s);
     287        /* Arcsine of s.  */
     288        p = (((((((((pS9 * z
     289  		   + pS8) * z
     290  		  + pS7) * z
     291  		 + pS6) * z
     292  		+ pS5) * z
     293  	       + pS4) * z
     294  	      + pS3) * z
     295  	     + pS2) * z
     296  	    + pS1) * z
     297  	   + pS0) * z;
     298        q = (((((((( z
     299  		   + qS8) * z
     300  		 + qS7) * z
     301  		+ qS6) * z
     302  	       + qS5) * z
     303  	      + qS4) * z
     304  	     + qS3) * z
     305  	    + qS2) * z
     306  	   + qS1) * z
     307  	+ qS0;
     308        r = s + (w + s * p / q);
     309  
     310        if (x < 0.0L)
     311  	w = pio2_hi + (pio2_lo - r);
     312        else
     313  	w = r;
     314        return 2.0 * w;
     315      }
     316  }
     317  libm_alias_finite (__ieee754_acosl, __acosl)