1  /*
       2   * ====================================================
       3   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       4   *
       5   * Developed at SunPro, a Sun Microsystems, Inc. business.
       6   * Permission to use, copy, modify, and distribute this
       7   * software is freely granted, provided that this notice
       8   * is preserved.
       9   * ====================================================
      10   */
      11  
      12  /* Long double expansions are
      13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
      14    and are incorporated herein by permission of the author.  The author
      15    reserves the right to distribute this material elsewhere under different
      16    copying permissions.  These modifications are distributed here under
      17    the following terms:
      18  
      19      This library is free software; you can redistribute it and/or
      20      modify it under the terms of the GNU Lesser General Public
      21      License as published by the Free Software Foundation; either
      22      version 2.1 of the License, or (at your option) any later version.
      23  
      24      This library is distributed in the hope that it will be useful,
      25      but WITHOUT ANY WARRANTY; without even the implied warranty of
      26      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      27      Lesser General Public License for more details.
      28  
      29      You should have received a copy of the GNU Lesser General Public
      30      License along with this library; if not, see
      31      <https://www.gnu.org/licenses/>.  */
      32  
      33  /* __ieee754_lgammal_r(x, signgamp)
      34   * Reentrant version of the logarithm of the Gamma function
      35   * with user provide pointer for the sign of Gamma(x).
      36   *
      37   * Method:
      38   *   1. Argument Reduction for 0 < x <= 8
      39   *	Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
      40   *	reduce x to a number in [1.5,2.5] by
      41   *		lgamma(1+s) = log(s) + lgamma(s)
      42   *	for example,
      43   *		lgamma(7.3) = log(6.3) + lgamma(6.3)
      44   *			    = log(6.3*5.3) + lgamma(5.3)
      45   *			    = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
      46   *   2. Polynomial approximation of lgamma around its
      47   *	minimum ymin=1.461632144968362245 to maintain monotonicity.
      48   *	On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
      49   *		Let z = x-ymin;
      50   *		lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
      51   *   2. Rational approximation in the primary interval [2,3]
      52   *	We use the following approximation:
      53   *		s = x-2.0;
      54   *		lgamma(x) = 0.5*s + s*P(s)/Q(s)
      55   *	Our algorithms are based on the following observation
      56   *
      57   *                             zeta(2)-1    2    zeta(3)-1    3
      58   * lgamma(2+s) = s*(1-Euler) + --------- * s  -  --------- * s  + ...
      59   *                                 2                 3
      60   *
      61   *	where Euler = 0.5771... is the Euler constant, which is very
      62   *	close to 0.5.
      63   *
      64   *   3. For x>=8, we have
      65   *	lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
      66   *	(better formula:
      67   *	   lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
      68   *	Let z = 1/x, then we approximation
      69   *		f(z) = lgamma(x) - (x-0.5)(log(x)-1)
      70   *	by
      71   *				    3       5             11
      72   *		w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
      73   *
      74   *   4. For negative x, since (G is gamma function)
      75   *		-x*G(-x)*G(x) = pi/sin(pi*x),
      76   *	we have
      77   *		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
      78   *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
      79   *	Hence, for x<0, signgam = sign(sin(pi*x)) and
      80   *		lgamma(x) = log(|Gamma(x)|)
      81   *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
      82   *	Note: one should avoid compute pi*(-x) directly in the
      83   *	      computation of sin(pi*(-x)).
      84   *
      85   *   5. Special Cases
      86   *		lgamma(2+s) ~ s*(1-Euler) for tiny s
      87   *		lgamma(1)=lgamma(2)=0
      88   *		lgamma(x) ~ -log(x) for tiny x
      89   *		lgamma(0) = lgamma(inf) = inf
      90   *		lgamma(-integer) = +-inf
      91   *
      92   */
      93  
      94  #include <math.h>
      95  #include <math_private.h>
      96  #include <libc-diag.h>
      97  #include <libm-alias-finite.h>
      98  
      99  static const long double
     100    half = 0.5L,
     101    one = 1.0L,
     102    pi = 3.14159265358979323846264L,
     103    two63 = 9.223372036854775808e18L,
     104  
     105    /* lgam(1+x) = 0.5 x + x a(x)/b(x)
     106       -0.268402099609375 <= x <= 0
     107       peak relative error 6.6e-22 */
     108    a0 = -6.343246574721079391729402781192128239938E2L,
     109    a1 =  1.856560238672465796768677717168371401378E3L,
     110    a2 =  2.404733102163746263689288466865843408429E3L,
     111    a3 =  8.804188795790383497379532868917517596322E2L,
     112    a4 =  1.135361354097447729740103745999661157426E2L,
     113    a5 =  3.766956539107615557608581581190400021285E0L,
     114  
     115    b0 =  8.214973713960928795704317259806842490498E3L,
     116    b1 =  1.026343508841367384879065363925870888012E4L,
     117    b2 =  4.553337477045763320522762343132210919277E3L,
     118    b3 =  8.506975785032585797446253359230031874803E2L,
     119    b4 =  6.042447899703295436820744186992189445813E1L,
     120    /* b5 =  1.000000000000000000000000000000000000000E0 */
     121  
     122  
     123    tc =  1.4616321449683623412626595423257213284682E0L,
     124    tf = -1.2148629053584961146050602565082954242826E-1,/* double precision */
     125  /* tt = (tail of tf), i.e. tf + tt has extended precision. */
     126    tt = 3.3649914684731379602768989080467587736363E-18L,
     127    /* lgam ( 1.4616321449683623412626595423257213284682E0 ) =
     128  -1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */
     129  
     130    /* lgam (x + tc) = tf + tt + x g(x)/h(x)
     131       - 0.230003726999612341262659542325721328468 <= x
     132       <= 0.2699962730003876587373404576742786715318
     133       peak relative error 2.1e-21 */
     134    g0 = 3.645529916721223331888305293534095553827E-18L,
     135    g1 = 5.126654642791082497002594216163574795690E3L,
     136    g2 = 8.828603575854624811911631336122070070327E3L,
     137    g3 = 5.464186426932117031234820886525701595203E3L,
     138    g4 = 1.455427403530884193180776558102868592293E3L,
     139    g5 = 1.541735456969245924860307497029155838446E2L,
     140    g6 = 4.335498275274822298341872707453445815118E0L,
     141  
     142    h0 = 1.059584930106085509696730443974495979641E4L,
     143    h1 =  2.147921653490043010629481226937850618860E4L,
     144    h2 = 1.643014770044524804175197151958100656728E4L,
     145    h3 =  5.869021995186925517228323497501767586078E3L,
     146    h4 =  9.764244777714344488787381271643502742293E2L,
     147    h5 =  6.442485441570592541741092969581997002349E1L,
     148    /* h6 = 1.000000000000000000000000000000000000000E0 */
     149  
     150  
     151    /* lgam (x+1) = -0.5 x + x u(x)/v(x)
     152       -0.100006103515625 <= x <= 0.231639862060546875
     153       peak relative error 1.3e-21 */
     154    u0 = -8.886217500092090678492242071879342025627E1L,
     155    u1 =  6.840109978129177639438792958320783599310E2L,
     156    u2 =  2.042626104514127267855588786511809932433E3L,
     157    u3 =  1.911723903442667422201651063009856064275E3L,
     158    u4 =  7.447065275665887457628865263491667767695E2L,
     159    u5 =  1.132256494121790736268471016493103952637E2L,
     160    u6 =  4.484398885516614191003094714505960972894E0L,
     161  
     162    v0 =  1.150830924194461522996462401210374632929E3L,
     163    v1 =  3.399692260848747447377972081399737098610E3L,
     164    v2 =  3.786631705644460255229513563657226008015E3L,
     165    v3 =  1.966450123004478374557778781564114347876E3L,
     166    v4 =  4.741359068914069299837355438370682773122E2L,
     167    v5 =  4.508989649747184050907206782117647852364E1L,
     168    /* v6 =  1.000000000000000000000000000000000000000E0 */
     169  
     170  
     171    /* lgam (x+2) = .5 x + x s(x)/r(x)
     172       0 <= x <= 1
     173       peak relative error 7.2e-22 */
     174    s0 =  1.454726263410661942989109455292824853344E6L,
     175    s1 = -3.901428390086348447890408306153378922752E6L,
     176    s2 = -6.573568698209374121847873064292963089438E6L,
     177    s3 = -3.319055881485044417245964508099095984643E6L,
     178    s4 = -7.094891568758439227560184618114707107977E5L,
     179    s5 = -6.263426646464505837422314539808112478303E4L,
     180    s6 = -1.684926520999477529949915657519454051529E3L,
     181  
     182    r0 = -1.883978160734303518163008696712983134698E7L,
     183    r1 = -2.815206082812062064902202753264922306830E7L,
     184    r2 = -1.600245495251915899081846093343626358398E7L,
     185    r3 = -4.310526301881305003489257052083370058799E6L,
     186    r4 = -5.563807682263923279438235987186184968542E5L,
     187    r5 = -3.027734654434169996032905158145259713083E4L,
     188    r6 = -4.501995652861105629217250715790764371267E2L,
     189    /* r6 =  1.000000000000000000000000000000000000000E0 */
     190  
     191  
     192  /* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2)
     193     x >= 8
     194     Peak relative error 1.51e-21
     195     w0 = LS2PI - 0.5 */
     196    w0 =  4.189385332046727417803e-1L,
     197    w1 =  8.333333333333331447505E-2L,
     198    w2 = -2.777777777750349603440E-3L,
     199    w3 =  7.936507795855070755671E-4L,
     200    w4 = -5.952345851765688514613E-4L,
     201    w5 =  8.412723297322498080632E-4L,
     202    w6 = -1.880801938119376907179E-3L,
     203    w7 =  4.885026142432270781165E-3L;
     204  
     205  static const long double zero = 0.0L;
     206  
     207  static long double
     208  sin_pi (long double x)
     209  {
     210    long double y, z;
     211    int n, ix;
     212    uint32_t se, i0, i1;
     213  
     214    GET_LDOUBLE_WORDS (se, i0, i1, x);
     215    ix = se & 0x7fff;
     216    ix = (ix << 16) | (i0 >> 16);
     217    if (ix < 0x3ffd8000) /* 0.25 */
     218      return __sinl (pi * x);
     219    y = -x;			/* x is assume negative */
     220  
     221    /*
     222     * argument reduction, make sure inexact flag not raised if input
     223     * is an integer
     224     */
     225    z = floorl (y);
     226    if (z != y)
     227      {				/* inexact anyway */
     228        y  *= 0.5;
     229        y = 2.0*(y - floorl(y));		/* y = |x| mod 2.0 */
     230        n = (int) (y*4.0);
     231      }
     232    else
     233      {
     234        if (ix >= 0x403f8000)  /* 2^64 */
     235  	{
     236  	  y = zero; n = 0;                 /* y must be even */
     237  	}
     238        else
     239  	{
     240  	if (ix < 0x403e8000)  /* 2^63 */
     241  	  z = y + two63;	/* exact */
     242  	GET_LDOUBLE_WORDS (se, i0, i1, z);
     243  	n = i1 & 1;
     244  	y  = n;
     245  	n <<= 2;
     246        }
     247      }
     248  
     249    switch (n)
     250      {
     251      case 0:
     252        y = __sinl (pi * y);
     253        break;
     254      case 1:
     255      case 2:
     256        y = __cosl (pi * (half - y));
     257        break;
     258      case 3:
     259      case 4:
     260        y = __sinl (pi * (one - y));
     261        break;
     262      case 5:
     263      case 6:
     264        y = -__cosl (pi * (y - 1.5));
     265        break;
     266      default:
     267        y = __sinl (pi * (y - 2.0));
     268        break;
     269      }
     270    return -y;
     271  }
     272  
     273  
     274  long double
     275  __ieee754_lgammal_r (long double x, int *signgamp)
     276  {
     277    long double t, y, z, nadj, p, p1, p2, q, r, w;
     278    int i, ix;
     279    uint32_t se, i0, i1;
     280  
     281    *signgamp = 1;
     282    GET_LDOUBLE_WORDS (se, i0, i1, x);
     283    ix = se & 0x7fff;
     284  
     285    if (__builtin_expect((ix | i0 | i1) == 0, 0))
     286      {
     287        if (se & 0x8000)
     288  	*signgamp = -1;
     289        return one / fabsl (x);
     290      }
     291  
     292    ix = (ix << 16) | (i0 >> 16);
     293  
     294    /* purge off +-inf, NaN, +-0, and negative arguments */
     295    if (__builtin_expect(ix >= 0x7fff0000, 0))
     296      return x * x;
     297  
     298    if (__builtin_expect(ix < 0x3fc08000, 0)) /* 2^-63 */
     299      {				/* |x|<2**-63, return -log(|x|) */
     300        if (se & 0x8000)
     301  	{
     302  	  *signgamp = -1;
     303  	  return -__ieee754_logl (-x);
     304  	}
     305        else
     306  	return -__ieee754_logl (x);
     307      }
     308    if (se & 0x8000)
     309      {
     310        if (x < -2.0L && x > -33.0L)
     311  	return __lgamma_negl (x, signgamp);
     312        t = sin_pi (x);
     313        if (t == zero)
     314  	return one / fabsl (t);	/* -integer */
     315        nadj = __ieee754_logl (pi / fabsl (t * x));
     316        if (t < zero)
     317  	*signgamp = -1;
     318        x = -x;
     319      }
     320  
     321    /* purge off 1 and 2 */
     322    if ((((ix - 0x3fff8000) | i0 | i1) == 0)
     323        || (((ix - 0x40008000) | i0 | i1) == 0))
     324      r = 0;
     325    else if (ix < 0x40008000) /* 2.0 */
     326      {
     327        /* x < 2.0 */
     328        if (ix <= 0x3ffee666) /* 8.99993896484375e-1 */
     329  	{
     330  	  /* lgamma(x) = lgamma(x+1) - log(x) */
     331  	  r = -__ieee754_logl (x);
     332  	  if (ix >= 0x3ffebb4a) /* 7.31597900390625e-1 */
     333  	    {
     334  	      y = x - one;
     335  	      i = 0;
     336  	    }
     337  	  else if (ix >= 0x3ffced33)/* 2.31639862060546875e-1 */
     338  	    {
     339  	      y = x - (tc - one);
     340  	      i = 1;
     341  	    }
     342  	  else
     343  	    {
     344  	      /* x < 0.23 */
     345  	      y = x;
     346  	      i = 2;
     347  	    }
     348  	}
     349        else
     350  	{
     351  	  r = zero;
     352  	  if (ix >= 0x3fffdda6) /* 1.73162841796875 */
     353  	    {
     354  	      /* [1.7316,2] */
     355  	      y = x - 2.0;
     356  	      i = 0;
     357  	    }
     358  	  else if (ix >= 0x3fff9da6)/* 1.23162841796875 */
     359  	    {
     360  	      /* [1.23,1.73] */
     361  	      y = x - tc;
     362  	      i = 1;
     363  	    }
     364  	  else
     365  	    {
     366  	      /* [0.9, 1.23] */
     367  	      y = x - one;
     368  	      i = 2;
     369  	    }
     370  	}
     371        switch (i)
     372  	{
     373  	case 0:
     374  	  p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5))));
     375  	  p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y))));
     376  	  r += half * y + y * p1/p2;
     377  	  break;
     378  	case 1:
     379      p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6)))));
     380      p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y)))));
     381      p = tt + y * p1/p2;
     382  	  r += (tf + p);
     383  	  break;
     384  	case 2:
     385   p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6))))));
     386        p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y)))));
     387  	  r += (-half * y + p1 / p2);
     388  	}
     389      }
     390    else if (ix < 0x40028000) /* 8.0 */
     391      {
     392        /* x < 8.0 */
     393        i = (int) x;
     394        t = zero;
     395        y = x - (double) i;
     396    p = y *
     397       (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
     398    q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y))))));
     399        r = half * y + p / q;
     400        z = one;			/* lgamma(1+s) = log(s) + lgamma(s) */
     401        switch (i)
     402  	{
     403  	case 7:
     404  	  z *= (y + 6.0);	/* FALLTHRU */
     405  	case 6:
     406  	  z *= (y + 5.0);	/* FALLTHRU */
     407  	case 5:
     408  	  z *= (y + 4.0);	/* FALLTHRU */
     409  	case 4:
     410  	  z *= (y + 3.0);	/* FALLTHRU */
     411  	case 3:
     412  	  z *= (y + 2.0);	/* FALLTHRU */
     413  	  r += __ieee754_logl (z);
     414  	  break;
     415  	}
     416      }
     417    else if (ix < 0x40418000) /* 2^66 */
     418      {
     419        /* 8.0 <= x < 2**66 */
     420        t = __ieee754_logl (x);
     421        z = one / x;
     422        y = z * z;
     423        w = w0 + z * (w1
     424  	  + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7))))));
     425        r = (x - half) * (t - one) + w;
     426      }
     427    else
     428      /* 2**66 <= x <= inf */
     429      r = x * (__ieee754_logl (x) - one);
     430    /* NADJ is set for negative arguments but not otherwise, resulting
     431       in warnings that it may be used uninitialized although in the
     432       cases where it is used it has always been set.  */
     433    DIAG_PUSH_NEEDS_COMMENT;
     434    DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wmaybe-uninitialized");
     435    if (se & 0x8000)
     436      r = nadj - r;
     437    DIAG_POP_NEEDS_COMMENT;
     438    return r;
     439  }
     440  libm_alias_finite (__ieee754_lgammal_r, __lgammal_r)