1  /* @(#)er_lgamma.c 5.1 93/09/24 */
       2  /*
       3   * ====================================================
       4   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       5   *
       6   * Developed at SunPro, a Sun Microsystems, Inc. business.
       7   * Permission to use, copy, modify, and distribute this
       8   * software is freely granted, provided that this notice
       9   * is preserved.
      10   * ====================================================
      11   */
      12  
      13  /* __ieee754_lgamma_r(x, signgamp)
      14   * Reentrant version of the logarithm of the Gamma function
      15   * with user provide pointer for the sign of Gamma(x).
      16   *
      17   * Method:
      18   *   1. Argument Reduction for 0 < x <= 8
      19   *	Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
      20   *	reduce x to a number in [1.5,2.5] by
      21   *		lgamma(1+s) = log(s) + lgamma(s)
      22   *	for example,
      23   *		lgamma(7.3) = log(6.3) + lgamma(6.3)
      24   *			    = log(6.3*5.3) + lgamma(5.3)
      25   *			    = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
      26   *   2. Polynomial approximation of lgamma around its
      27   *	minimum ymin=1.461632144968362245 to maintain monotonicity.
      28   *	On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
      29   *		Let z = x-ymin;
      30   *		lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
      31   *	where
      32   *		poly(z) is a 14 degree polynomial.
      33   *   2. Rational approximation in the primary interval [2,3]
      34   *	We use the following approximation:
      35   *		s = x-2.0;
      36   *		lgamma(x) = 0.5*s + s*P(s)/Q(s)
      37   *	with accuracy
      38   *		|P/Q - (lgamma(x)-0.5s)| < 2**-61.71
      39   *	Our algorithms are based on the following observation
      40   *
      41   *                             zeta(2)-1    2    zeta(3)-1    3
      42   * lgamma(2+s) = s*(1-Euler) + --------- * s  -  --------- * s  + ...
      43   *                                 2                 3
      44   *
      45   *	where Euler = 0.5771... is the Euler constant, which is very
      46   *	close to 0.5.
      47   *
      48   *   3. For x>=8, we have
      49   *	lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
      50   *	(better formula:
      51   *	   lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
      52   *	Let z = 1/x, then we approximation
      53   *		f(z) = lgamma(x) - (x-0.5)(log(x)-1)
      54   *	by
      55   *				    3       5             11
      56   *		w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
      57   *	where
      58   *		|w - f(z)| < 2**-58.74
      59   *
      60   *   4. For negative x, since (G is gamma function)
      61   *		-x*G(-x)*G(x) = pi/sin(pi*x),
      62   *	we have
      63   *		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
      64   *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
      65   *	Hence, for x<0, signgam = sign(sin(pi*x)) and
      66   *		lgamma(x) = log(|Gamma(x)|)
      67   *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
      68   *	Note: one should avoid compute pi*(-x) directly in the
      69   *	      computation of sin(pi*(-x)).
      70   *
      71   *   5. Special Cases
      72   *		lgamma(2+s) ~ s*(1-Euler) for tiny s
      73   *		lgamma(1)=lgamma(2)=0
      74   *		lgamma(x) ~ -log(x) for tiny x
      75   *		lgamma(0) = lgamma(inf) = inf
      76   *		lgamma(-integer) = +-inf
      77   *
      78   */
      79  
      80  #include <math.h>
      81  #include <math-narrow-eval.h>
      82  #include <math_private.h>
      83  #include <libc-diag.h>
      84  #include <libm-alias-finite.h>
      85  
      86  static const double
      87  two52=  4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
      88  half=  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
      89  one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
      90  pi  =  3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
      91  a0  =  7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
      92  a1  =  3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
      93  a2  =  6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
      94  a3  =  2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
      95  a4  =  7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
      96  a5  =  2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
      97  a6  =  1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
      98  a7  =  5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
      99  a8  =  2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
     100  a9  =  1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
     101  a10 =  2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
     102  a11 =  4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
     103  tc  =  1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
     104  tf  = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
     105  /* tt = -(tail of tf) */
     106  tt  = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
     107  t0  =  4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
     108  t1  = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
     109  t2  =  6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
     110  t3  = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
     111  t4  =  1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
     112  t5  = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
     113  t6  =  6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
     114  t7  = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
     115  t8  =  2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
     116  t9  = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
     117  t10 =  8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
     118  t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
     119  t12 =  3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
     120  t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
     121  t14 =  3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
     122  u0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
     123  u1  =  6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
     124  u2  =  1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
     125  u3  =  9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
     126  u4  =  2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
     127  u5  =  1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
     128  v1  =  2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
     129  v2  =  2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
     130  v3  =  7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
     131  v4  =  1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
     132  v5  =  3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
     133  s0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
     134  s1  =  2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
     135  s2  =  3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
     136  s3  =  1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
     137  s4  =  2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
     138  s5  =  1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
     139  s6  =  3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
     140  r1  =  1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
     141  r2  =  7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
     142  r3  =  1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
     143  r4  =  1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
     144  r5  =  7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
     145  r6  =  7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
     146  w0  =  4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
     147  w1  =  8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
     148  w2  = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
     149  w3  =  7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
     150  w4  = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
     151  w5  =  8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
     152  w6  = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
     153  
     154  static const double zero=  0.00000000000000000000e+00;
     155  
     156  static double
     157  sin_pi(double x)
     158  {
     159  	double y,z;
     160  	int n,ix;
     161  
     162  	GET_HIGH_WORD(ix,x);
     163  	ix &= 0x7fffffff;
     164  
     165  	if(ix<0x3fd00000) return __sin(pi*x);
     166  	y = -x;		/* x is assume negative */
     167  
     168      /*
     169       * argument reduction, make sure inexact flag not raised if input
     170       * is an integer
     171       */
     172  	z = floor(y);
     173  	if(z!=y) {				/* inexact anyway */
     174  	    y  *= 0.5;
     175  	    y   = 2.0*(y - floor(y));		/* y = |x| mod 2.0 */
     176  	    n   = (int) (y*4.0);
     177  	} else {
     178  	    if(ix>=0x43400000) {
     179  		y = zero; n = 0;                 /* y must be even */
     180  	    } else {
     181  		if(ix<0x43300000) z = y+two52;	/* exact */
     182  		GET_LOW_WORD(n,z);
     183  		n &= 1;
     184  		y  = n;
     185  		n<<= 2;
     186  	    }
     187  	}
     188  	switch (n) {
     189  	    case 0:   y =  __sin(pi*y); break;
     190  	    case 1:
     191  	    case 2:   y =  __cos(pi*(0.5-y)); break;
     192  	    case 3:
     193  	    case 4:   y =  __sin(pi*(one-y)); break;
     194  	    case 5:
     195  	    case 6:   y = -__cos(pi*(y-1.5)); break;
     196  	    default:  y =  __sin(pi*(y-2.0)); break;
     197  	    }
     198  	return -y;
     199  }
     200  
     201  
     202  double
     203  __ieee754_lgamma_r(double x, int *signgamp)
     204  {
     205  	double t,y,z,nadj,p,p1,p2,p3,q,r,w;
     206  	int i,hx,lx,ix;
     207  
     208  	EXTRACT_WORDS(hx,lx,x);
     209  
     210      /* purge off +-inf, NaN, +-0, and negative arguments */
     211  	*signgamp = 1;
     212  	ix = hx&0x7fffffff;
     213  	if(__builtin_expect(ix>=0x7ff00000, 0)) return x*x;
     214  	if(__builtin_expect((ix|lx)==0, 0))
     215  	  {
     216  	    if (hx < 0)
     217  	      *signgamp = -1;
     218  	    return one/fabs(x);
     219  	  }
     220  	if(__builtin_expect(ix<0x3b900000, 0)) {
     221  	    /* |x|<2**-70, return -log(|x|) */
     222  	    if(hx<0) {
     223  		*signgamp = -1;
     224  		return -__ieee754_log(-x);
     225  	    } else return -__ieee754_log(x);
     226  	}
     227  	if(hx<0) {
     228  	    if(__builtin_expect(ix>=0x43300000, 0))
     229  		/* |x|>=2**52, must be -integer */
     230  		return fabs (x)/zero;
     231  	    if (x < -2.0 && x > -28.0)
     232  		return __lgamma_neg (x, signgamp);
     233  	    t = sin_pi(x);
     234  	    if(t==zero) return one/fabsf(t); /* -integer */
     235  	    nadj = __ieee754_log(pi/fabs(t*x));
     236  	    if(t<zero) *signgamp = -1;
     237  	    x = -x;
     238  	}
     239  
     240      /* purge off 1 and 2 */
     241  	if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
     242      /* for x < 2.0 */
     243  	else if(ix<0x40000000) {
     244  	    if(ix<=0x3feccccc) {	/* lgamma(x) = lgamma(x+1)-log(x) */
     245  		r = -__ieee754_log(x);
     246  		if(ix>=0x3FE76944) {y = one-x; i= 0;}
     247  		else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
     248  		else {y = x; i=2;}
     249  	    } else {
     250  		r = zero;
     251  		if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
     252  		else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
     253  		else {y=x-one;i=2;}
     254  	    }
     255  	    switch(i) {
     256  	      case 0:
     257  		z = y*y;
     258  		p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
     259  		p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
     260  		p  = y*p1+p2;
     261  		r  += (p-0.5*y); break;
     262  	      case 1:
     263  		z = y*y;
     264  		w = z*y;
     265  		p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12)));	/* parallel comp */
     266  		p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
     267  		p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
     268  		p  = z*p1-(tt-w*(p2+y*p3));
     269  		r += (tf + p); break;
     270  	      case 2:
     271  		p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
     272  		p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
     273  		r += (-0.5*y + p1/p2);
     274  	    }
     275  	}
     276  	else if(ix<0x40200000) {			/* x < 8.0 */
     277  	    i = (int)x;
     278  	    t = zero;
     279  	    y = x-(double)i;
     280  	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
     281  	    q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
     282  	    r = half*y+p/q;
     283  	    z = one;	/* lgamma(1+s) = log(s) + lgamma(s) */
     284  	    switch(i) {
     285  	    case 7: z *= (y+6.0);	/* FALLTHRU */
     286  	    case 6: z *= (y+5.0);	/* FALLTHRU */
     287  	    case 5: z *= (y+4.0);	/* FALLTHRU */
     288  	    case 4: z *= (y+3.0);	/* FALLTHRU */
     289  	    case 3: z *= (y+2.0);	/* FALLTHRU */
     290  		    r += __ieee754_log(z); break;
     291  	    }
     292      /* 8.0 <= x < 2**58 */
     293  	} else if (ix < 0x43900000) {
     294  	    t = __ieee754_log(x);
     295  	    z = one/x;
     296  	    y = z*z;
     297  	    w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
     298  	    r = (x-half)*(t-one)+w;
     299  	} else
     300      /* 2**58 <= x <= inf */
     301  	    r =  math_narrow_eval (x*(__ieee754_log(x)-one));
     302  	/* NADJ is set for negative arguments but not otherwise,
     303  	   resulting in warnings that it may be used uninitialized
     304  	   although in the cases where it is used it has always been
     305  	   set.  */
     306  	DIAG_PUSH_NEEDS_COMMENT;
     307  	DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wmaybe-uninitialized");
     308  	if(hx<0) r = nadj - r;
     309  	DIAG_POP_NEEDS_COMMENT;
     310  	return r;
     311  }
     312  libm_alias_finite (__ieee754_lgamma_r, __lgamma_r)