1  /* mpfr_lngamma -- lngamma function
       2  
       3  Copyright 2005-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #define MPFR_NEED_LONGLONG_H
      24  #include "mpfr-impl.h"
      25  
      26  /* given a precision p, return alpha, such that the argument reduction
      27     will use k = alpha*p*log(2).
      28  
      29     Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
      30     and the smallest value of alpha multiplied by the smallest working
      31     precision should be >= 4.
      32  */
      33  static void
      34  mpfr_gamma_alpha (mpfr_ptr s, mpfr_prec_t p)
      35  {
      36    MPFR_LOG_FUNC
      37      (("p=%Pd", p),
      38       ("s[%Pd]=%.*Rg", mpfr_get_prec (s), mpfr_log_prec, s));
      39  
      40    if (p <= 100)
      41      mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
      42    else if (p <= 500)
      43      mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
      44    else if (p <= 1000)
      45      mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
      46    else if (p <= 2000)
      47      mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
      48    else if (p <= 5000)
      49      mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
      50    else if (p <= 10000)
      51      mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
      52    else
      53      mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
      54  }
      55  
      56  #ifdef IS_GAMMA
      57  
      58  /* This function is called in case of intermediate overflow/underflow.
      59     The s1 and s2 arguments are temporary MPFR numbers, having the
      60     working precision. If the result could be determined, then the
      61     flags are updated via pexpo, y is set to the result, and the
      62     (non-zero) ternary value is returned. Otherwise 0 is returned
      63     in order to perform the next Ziv iteration. */
      64  static int
      65  mpfr_explgamma (mpfr_ptr y, mpfr_srcptr x, mpfr_save_expo_t *pexpo,
      66                  mpfr_ptr s1, mpfr_ptr s2, mpfr_rnd_t rnd)
      67  {
      68    mpfr_t t1, t2;
      69    int inex1, inex2, sign;
      70    MPFR_BLOCK_DECL (flags1);
      71    MPFR_BLOCK_DECL (flags2);
      72    MPFR_GROUP_DECL (group);
      73  
      74    MPFR_BLOCK (flags1, inex1 = mpfr_lgamma (s1, &sign, x, MPFR_RNDD));
      75    MPFR_ASSERTN (inex1 != 0);
      76    /* s1 = RNDD(lngamma(x)), inexact */
      77    if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags1)))
      78      {
      79        if (MPFR_IS_POS (s1))
      80          {
      81            MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_OVERFLOW);
      82            return mpfr_overflow (y, rnd, sign);
      83          }
      84        else
      85          {
      86            MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_UNDERFLOW);
      87            return mpfr_underflow (y, rnd == MPFR_RNDN ? MPFR_RNDZ : rnd, sign);
      88          }
      89      }
      90  
      91    mpfr_set (s2, s1, MPFR_RNDN);     /* exact */
      92    mpfr_nextabove (s2);              /* v = RNDU(lngamma(z0)) */
      93  
      94    if (sign < 0)
      95      rnd = MPFR_INVERT_RND (rnd);  /* since the result with be negated */
      96    MPFR_GROUP_INIT_2 (group, MPFR_PREC (y), t1, t2);
      97    MPFR_BLOCK (flags1, inex1 = mpfr_exp (t1, s1, rnd));
      98    MPFR_BLOCK (flags2, inex2 = mpfr_exp (t2, s2, rnd));
      99    /* t1 is the rounding with mode 'rnd' of a lower bound on |Gamma(x)|,
     100       t2 is the rounding with mode 'rnd' of an upper bound, thus if both
     101       are equal, so is the wanted result. If t1 and t2 differ or the flags
     102       differ, at some point of Ziv's loop they should agree. */
     103    if (mpfr_equal_p (t1, t2) && flags1 == flags2)
     104      {
     105        MPFR_ASSERTN ((inex1 > 0 && inex2 > 0) || (inex1 < 0 && inex2 < 0));
     106        mpfr_set4 (y, t1, MPFR_RNDN, sign);  /* exact */
     107        if (sign < 0)
     108          inex1 = - inex1;
     109        MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, flags1);
     110      }
     111    else
     112      inex1 = 0;  /* couldn't determine the result */
     113    MPFR_GROUP_CLEAR (group);
     114  
     115    return inex1;
     116  }
     117  
     118  #else
     119  
     120  static int
     121  unit_bit (mpfr_srcptr x)
     122  {
     123    mpfr_exp_t expo;
     124    mpfr_prec_t prec;
     125    mp_limb_t x0;
     126  
     127    expo = MPFR_GET_EXP (x);
     128    if (expo <= 0)
     129      return 0;  /* |x| < 1 */
     130  
     131    prec = MPFR_PREC (x);
     132    if (expo > prec)
     133      return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
     134  
     135    /* Now, the unit bit is represented. */
     136  
     137    prec = MPFR_PREC2LIMBS (prec) * GMP_NUMB_BITS - expo;
     138    /* number of represented fractional bits (including the trailing 0's) */
     139  
     140    x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
     141    /* limb containing the unit bit */
     142  
     143    return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
     144  }
     145  
     146  #endif
     147  
     148  /* FIXME: There is an internal overflow when z is very large.
     149     Simple overflow detection with possible false negatives?
     150     For the particular cases near the overflow boundary,
     151     scaling by a power of two?
     152  */
     153  
     154  /* lngamma(x) = log(gamma(x)).
     155     We use formula [6.1.40] from Abramowitz&Stegun:
     156     lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
     157                + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
     158     According to [6.1.42], if the sum is truncated after m=n, the error
     159     R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
     160     where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
     161     For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
     162   */
     163  #ifdef IS_GAMMA
     164  #define GAMMA_FUNC mpfr_gamma_aux
     165  #else
     166  #define GAMMA_FUNC mpfr_lngamma_aux
     167  #endif
     168  
     169  static int
     170  GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
     171  {
     172    mpfr_prec_t precy, w; /* working precision */
     173    mpfr_t s, t, u, v, z;
     174    unsigned long m, k, maxm, l;
     175    int compared, inexact;
     176    mpfr_exp_t err_s, err_t;
     177    double d;
     178    MPFR_SAVE_EXPO_DECL (expo);
     179    MPFR_ZIV_DECL (loop);
     180  
     181    MPFR_LOG_FUNC
     182      (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (z0), mpfr_log_prec, z0, rnd),
     183       ("y[%Pd]=%.*Rg inexact=%d",
     184        mpfr_get_prec (y), mpfr_log_prec, y, inexact));
     185  
     186    compared = mpfr_cmp_ui (z0, 1);
     187  
     188    MPFR_SAVE_EXPO_MARK (expo);
     189  
     190  #ifndef IS_GAMMA /* lngamma or lgamma */
     191    if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
     192      {
     193        MPFR_SAVE_EXPO_FREE (expo);
     194        return mpfr_set_ui (y, 0, MPFR_RNDN);  /* lngamma(1 or 2) = +0 */
     195      }
     196  
     197    /* Deal with very large inputs: according to [6.1.42], if we denote
     198       R_n(z) = lngamma(z) - (z-1/2)*log(z) + z - 1/2*log(2*Pi), we have
     199       |R_n(z)| <= B_2/2/z, thus for z >= 2 we have
     200       |lngamma(z) - [z*(log(z) - 1)]| < 1/2*log(z) + 1. */
     201    if (compared > 0 && MPFR_GET_EXP (z0) >= (mpfr_exp_t) MPFR_PREC(y) + 2)
     202      {
     203        /* Since PREC(y) >= 2, this ensures EXP(z0) >= 4, thus |z0| >= 8,
     204           thus 1/2*log(z0) + 1 < log(z0).
     205           Since the largest possible z is < 2^(2^62) on a 64-bit machine,
     206           the largest value of log(z) is 2^62*log(2.) < 3.2e18 < 2^62,
     207           thus if we use at least 62 bits of precision, then log(t)-1 will
     208           be exact */
     209        mpfr_init2 (t, MPFR_PREC(y) >= 52 ? MPFR_PREC(y) + 10 : 62);
     210        mpfr_log (t, z0, MPFR_RNDU); /* error < 1 ulp */
     211        inexact = mpfr_sub_ui (t, t, 1, MPFR_RNDU); /* err < 2 ulps, since the
     212                                                       exponent of t might have
     213                                                       decreased */
     214        MPFR_ASSERTD(inexact == 0);
     215        mpfr_mul (t, z0, t, MPFR_RNDU); /* err < 1+2*2=5 ulps according to
     216                                          "Generic error on multiplication"
     217                                          in algorithms.tex */
     218        if (MPFR_IS_INF(t))
     219          {
     220            mpfr_clear (t);
     221            MPFR_SAVE_EXPO_FREE (expo);
     222            inexact = mpfr_overflow (y, rnd, 1);
     223            return inexact;
     224          }
     225        if (MPFR_GET_EXP(t) - MPFR_PREC(t) >= 62)
     226          {
     227            /* then ulp(t) >= 2^62 > log(z0) thus the total error is bounded
     228               by 6 ulp(t) */
     229            if (MPFR_CAN_ROUND (t, MPFR_PREC(t) - 3, MPFR_PREC(y), rnd))
     230              {
     231                inexact = mpfr_set (y, t, rnd);
     232                mpfr_clear (t);
     233                MPFR_SAVE_EXPO_FREE (expo);
     234                return mpfr_check_range (y, inexact, rnd);
     235              }
     236          }
     237        mpfr_clear (t);
     238      }
     239  
     240    /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
     241       - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
     242    if (MPFR_GET_EXP (z0) <= - (mpfr_exp_t) MPFR_PREC(y))
     243      {
     244        mpfr_t l, h, g;
     245        int ok, inex1, inex2;
     246        mpfr_prec_t prec = MPFR_PREC(y) + 14;
     247        MPFR_ZIV_DECL (loop);
     248  
     249        MPFR_ZIV_INIT (loop, prec);
     250        do
     251          {
     252            mpfr_init2 (l, prec);
     253            if (MPFR_IS_POS(z0))
     254              {
     255                mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
     256                mpfr_init2 (h, MPFR_PREC(l));
     257              }
     258            else
     259              {
     260                mpfr_init2 (h, MPFR_PREC(z0));
     261                mpfr_neg (h, z0, MPFR_RNDN); /* exact */
     262                mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
     263                mpfr_set_prec (h, MPFR_PREC(l));
     264              }
     265            mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
     266            mpfr_set (h, l, MPFR_RNDD); /* exact */
     267            mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
     268                                   to mpfr_log */
     269            mpfr_init2 (g, MPFR_PREC(l));
     270            /* if z0>0, we need an upper approximation of Euler's constant
     271               for the left bound */
     272            mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
     273            mpfr_mul (g, g, z0, MPFR_RNDD);
     274            mpfr_sub (l, l, g, MPFR_RNDD);
     275            mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
     276            mpfr_mul (g, g, z0, MPFR_RNDU);
     277            mpfr_sub (h, h, g, MPFR_RNDD);
     278            mpfr_sqr (g, z0, MPFR_RNDU);
     279            mpfr_add (h, h, g, MPFR_RNDU);
     280            inex1 = mpfr_prec_round (l, MPFR_PREC(y), rnd);
     281            inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
     282            /* Caution: we not only need l = h, but both inexact flags should
     283               agree. Indeed, one of the inexact flags might be zero. In that
     284               case if we assume lngamma(z0) cannot be exact, the other flag
     285               should be correct. We are conservative here and request that both
     286               inexact flags agree. */
     287            ok = SAME_SIGN (inex1, inex2) && mpfr_equal_p (l, h);
     288            if (ok)
     289              mpfr_set (y, h, rnd); /* exact */
     290            mpfr_clear (l);
     291            mpfr_clear (h);
     292            mpfr_clear (g);
     293            if (ok)
     294              {
     295                MPFR_ZIV_FREE (loop);
     296                MPFR_SAVE_EXPO_FREE (expo);
     297                return mpfr_check_range (y, inex1, rnd);
     298              }
     299            /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
     300               if x ~ 2^(-n), then we have a n-bit approximation, thus
     301               we can try again with a working precision of n bits,
     302               especially when n >> PREC(y).
     303               Otherwise we would use the reflection formula evaluating x-1,
     304               which would need precision n. */
     305            MPFR_ZIV_NEXT (loop, prec);
     306          }
     307        while (prec <= - MPFR_GET_EXP (z0));
     308        MPFR_ZIV_FREE (loop);
     309      }
     310  #endif
     311  
     312    precy = MPFR_PREC(y);
     313  
     314    mpfr_init2 (s, MPFR_PREC_MIN);
     315    mpfr_init2 (t, MPFR_PREC_MIN);
     316    mpfr_init2 (u, MPFR_PREC_MIN);
     317    mpfr_init2 (v, MPFR_PREC_MIN);
     318    mpfr_init2 (z, MPFR_PREC_MIN);
     319  
     320    inexact = 0; /* 0 means: result y not set yet */
     321  
     322    if (compared < 0)
     323      {
     324        mpfr_exp_t err_u;
     325  
     326        /* use reflection formula:
     327           gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
     328           thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
     329  
     330        w = precy + MPFR_INT_CEIL_LOG2 (precy);
     331        w += MPFR_INT_CEIL_LOG2 (w) + 14;
     332        MPFR_ZIV_INIT (loop, w);
     333        while (1)
     334          {
     335            MPFR_ASSERTD(w >= 3);
     336            mpfr_set_prec (s, w);
     337            mpfr_set_prec (t, w);
     338            mpfr_set_prec (u, w);
     339            mpfr_set_prec (v, w);
     340            /* In the following, we write r for a real of absolute value
     341               at most 2^(-w). Different instances of r may represent different
     342               values. */
     343            mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
     344            mpfr_const_pi (t, MPFR_RNDN);      /* t = Pi * (1+r) */
     345            mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
     346            /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
     347               We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
     348               Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
     349               the error on u is bounded by
     350               ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
     351               = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
     352            d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
     353            if (MPFR_IS_ZERO(u)) /* in that case the error on u is zero */
     354              err_u = 0;
     355            else
     356              err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
     357            err_u = (err_u >= 0) ? err_u + 1 : 0;
     358            /* now the error on u is bounded by 2^err_u ulps */
     359  
     360            mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
     361            err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
     362            mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
     363            /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
     364               <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
     365               <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
     366            err_s += 3 - MPFR_GET_EXP(s);
     367            err_s = (err_s >= 0) ? err_s + 1 : 0;
     368            /* the error on s is bounded by 2^err_s ulp(s), thus by
     369               2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
     370               Now n*2^(-w) can always be written |(1+r)^n-1| for some
     371               |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
     372               |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
     373               true value.
     374               In fact if ulp(s) <= ulp(S) the same inequality holds for
     375               |S| instead of |s| in the right hand side, i.e., we can
     376               write s = (1+r)^(2^(err_s+1)) * S.
     377               But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
     378               to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
     379               E = n*2^(-w) we have |s - S| <= E * |s|, thus
     380               |s - S| <= E/(1-E) * |S|.
     381               Now E/(1-E) is bounded by 2E as long as E<=1/2,
     382               and 2E can be written (1+r)^(2n)-1 as above.
     383            */
     384            err_s += 2; /* exponent of relative error */
     385  
     386            mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
     387            mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
     388            mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
     389            mpfr_abs (v, v, MPFR_RNDN);
     390            /* (1+r)^(3+2^err_s+1) */
     391            err_s = (err_s <= 1) ? 3 : err_s + 1;
     392            /* now (1+r)^M with M <= 2^err_s */
     393            mpfr_log (v, v, MPFR_RNDN);
     394            /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
     395               Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
     396               bounded by ulp(v)/2 + 2^(err_s+1-w). */
     397            if (err_s + 2 > w)
     398              {
     399                w += err_s + 2;
     400              }
     401            else
     402              {
     403                /* if v = 0 here, it was 1 before the call to mpfr_log,
     404                   thus the error on v was zero */
     405                if (!MPFR_IS_ZERO(v))
     406                  err_s += 1 - MPFR_GET_EXP(v);
     407                err_s = (err_s >= 0) ? err_s + 1 : 0;
     408                /* the error on v is bounded by 2^err_s ulps */
     409                err_u += MPFR_GET_EXP(u); /* absolute error on u */
     410                if (!MPFR_IS_ZERO(v)) /* same as above */
     411                  err_s += MPFR_GET_EXP(v); /* absolute error on v */
     412                mpfr_sub (s, v, u, MPFR_RNDN);
     413                /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
     414                   + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
     415                err_s = (err_s >= err_u) ? err_s : err_u;
     416                err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
     417                err_s = (err_s >= 0) ? err_s + 1 : 0;
     418                if (MPFR_CAN_ROUND (s, w - err_s, precy, rnd))
     419                  goto end;
     420              }
     421            MPFR_ZIV_NEXT (loop, w);
     422          }
     423        MPFR_ZIV_FREE (loop);
     424      }
     425  
     426    /* now z0 > 1 */
     427  
     428    MPFR_ASSERTD (compared > 0);
     429  
     430    /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
     431       so there is a cancellation of ~log(w) in the argument reconstruction */
     432    w = precy + MPFR_INT_CEIL_LOG2 (precy);
     433    w += MPFR_INT_CEIL_LOG2 (w) + 13;
     434    MPFR_ZIV_INIT (loop, w);
     435    while (1)
     436      {
     437        MPFR_ASSERTD (w >= 3);
     438  
     439        /* argument reduction: we compute gamma(z0 + k), where the series
     440           has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
     441           and we need k steps of argument reconstruction. Assuming k is large
     442           with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
     443           k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
     444           However, since the series is slightly more expensive to compute,
     445           the optimal value seems to be k ~ 0.25 * w experimentally (with
     446           caching of Bernoulli numbers).
     447           For only one computation of gamma with large precision, it is better
     448           to set k to a larger value, say k ~ w. */
     449        mpfr_set_prec (s, 53);
     450        mpfr_gamma_alpha (s, w);
     451        mpfr_set_ui_2exp (s, 4, -4, MPFR_RNDU);
     452        mpfr_mul_ui (s, s, w, MPFR_RNDU);
     453        if (mpfr_cmp (z0, s) < 0)
     454          {
     455            mpfr_sub (s, s, z0, MPFR_RNDU);
     456            k = mpfr_get_ui (s, MPFR_RNDU);
     457            if (k < 3)
     458              k = 3;
     459          }
     460        else
     461          k = 3;
     462  
     463        mpfr_set_prec (s, w);
     464        mpfr_set_prec (t, w);
     465        mpfr_set_prec (u, w);
     466        mpfr_set_prec (v, w);
     467        mpfr_set_prec (z, w);
     468  
     469        mpfr_add_ui (z, z0, k, MPFR_RNDN);
     470        /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
     471  
     472        /* z >= 4 ensures the relative error on log(z) is small,
     473           and also (z-1/2)*log(z)-z >= 0 */
     474        MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
     475  
     476        mpfr_log (s, z, MPFR_RNDN); /* log(z) */
     477        /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
     478           Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
     479           = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
     480           s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
     481        mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
     482        mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
     483        /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
     484           t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
     485        mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
     486        mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
     487        mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
     488        /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
     489  
     490        mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
     491  
     492        /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
     493        mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
     494        mpfr_set (v, t, MPFR_RNDN);        /* (1+u)^2, v < 2^(-5) */
     495        mpfr_add (s, s, v, MPFR_RNDN);     /* (1+u)^15 */
     496  
     497        mpfr_sqr (u, u, MPFR_RNDN);        /* 1/z^2 * (1+u)^3 */
     498  
     499        /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
     500        maxm = 1UL << (sizeof(unsigned long) * CHAR_BIT / 2 - 1);
     501  
     502        /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
     503  
     504        for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
     505          {
     506            mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
     507            if (m <= maxm)
     508              {
     509                mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
     510                mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
     511                mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
     512              }
     513            else
     514              {
     515                mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
     516                mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
     517                mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
     518                mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
     519                mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
     520                mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
     521              }
     522            /* (1+u)^(10m-8) */
     523            /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
     524            mpfr_mul_z (v, t, mpfr_bernoulli_cache(m), MPFR_RNDN); /* (1+u)^(10m-7) */
     525            MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
     526            mpfr_add (s, s, v, MPFR_RNDN);
     527          }
     528        /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
     529        MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
     530  
     531        /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
     532           <= 1.46*u for u <= 2^(-3).
     533           We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
     534           for z >= 4, thus since the initial s >= 0.85, the different values of
     535           s differ by at most one binade, and the total rounding error on s
     536           in the for-loop is bounded by 2*(m-1)*ulp(final_s).
     537           The error coming from the v's is bounded by
     538           1.46*2^(-w) <= 2*ulp(final_s).
     539           Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
     540           <= (2m+47)*ulp(s).
     541           Taking into account the truncation error (which is bounded by the last
     542           term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
     543        */
     544  
     545        /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
     546        mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
     547        mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
     548        /* k >= 3 */
     549        mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
     550        l = 1;
     551  
     552  /* replace #if 1 by #if 0 for the naive argument reconstruction */
     553  #if 1
     554  
     555        /* We multiply by (z0+1)*(z0+2)*...*(z0+k-1) by blocks of j consecutive
     556           terms where j ~ sqrt(k).
     557           If we multiply naively by z0+1, then by z0+2, ..., then by z0+j,
     558           the multiplicative term for the rounding error is (1+u)^(2j).
     559           The multiplicative term is not larger when we multiply by
     560           Z[j] + c[j-1]*Z[j-1] + ... + c[2]*Z[2] + c[1]*z0 + c[0]
     561           with c[p] integers, and Z[p] = z0^p * (1+u)^(p-1).
     562           Note that all terms are positive.
     563           Indeed, since c[1] is exact, c[1]*z0 corresponds to (1+u),
     564           then c[1]*z0 + c[0] corresponds to (1+u)^2,
     565           c[2]*Z[2] + c[1]*z0 + c[0] to (1+u)^3, ...,
     566           c[j-1]*Z[j-1] + ... + c[0] to (1+u)^j,
     567           and Z[j] + c[j-1]*Z[j-1] + ... + c[1]*z0 + c[0] to (1+u)^(j+1).
     568           With the accumulation in t, we get (1+u)^(j+2) and j+2 <= 2j. */
     569        {
     570          unsigned long j, i, p;
     571          mpfr_t *Z;
     572          mpz_t *c;
     573          for (j = 2; (j + 1) * (j + 1) < k; j++);
     574          /* Z[i] stores z0^i for i <= j */
     575          Z = (mpfr_t *) mpfr_allocate_func ((j + 1) * sizeof (mpfr_t));
     576          for (i = 2; i <= j; i++)
     577            mpfr_init2 (Z[i], w);
     578          mpfr_sqr (Z[2], z0, MPFR_RNDN);
     579          for (i = 3; i <= j; i++)
     580            if ((i & 1) == 0)
     581              mpfr_sqr (Z[i], Z[i >> 1], MPFR_RNDN);
     582            else
     583              mpfr_mul (Z[i], Z[i-1], z0, MPFR_RNDN);
     584          c = (mpz_t *) mpfr_allocate_func ((j + 1) * sizeof (mpz_t));
     585          for (i = 0; i <= j; i++)
     586            mpz_init (c[i]);
     587          for (; l + j <= k; l += j)
     588            {
     589              /* c[i] is the coefficient of x^i in (x+l)*...*(x+l+j-1) */
     590              mpz_set_ui (c[0], 1);
     591              for (i = 0; i < j; i++)
     592                /* multiply (x+l)*(x+l+1)*...*(x+l+i-1) by x+l+i:
     593                   (b[i]*x^i + b[i-1]*x^(i-1) + ... + b[0])*(x+l+i) =
     594                   b[i]*x^(i+1) + (b[i-1]+(l+i)*b[i])*x^i + ...
     595                   + (b[0]+(l+i)*b[1])*x + i*b[0] */
     596                {
     597                  mpz_set (c[i+1], c[i]); /* b[i]*x^(i+1) */
     598                  for (p = i; p > 0; p--)
     599                    {
     600                      mpz_mul_ui (c[p], c[p], l + i);
     601                      mpz_add (c[p], c[p], c[p-1]); /* b[p-1]+(l+i)*b[p] */
     602                    }
     603                  mpz_mul_ui (c[0], c[0], l+i); /* i*b[0] */
     604                }
     605              /* now compute z0^j + c[j-1]*z0^(j-1) + ... + c[1]*z0 + c[0] */
     606              mpfr_set_z (u, c[0], MPFR_RNDN);
     607              for (i = 0; i < j; i++)
     608                {
     609                  mpfr_mul_z (z, (i == 0) ? z0 : Z[i+1], c[i+1], MPFR_RNDN);
     610                  mpfr_add (u, u, z, MPFR_RNDN);
     611                }
     612              mpfr_mul (t, t, u, MPFR_RNDN);
     613            }
     614          for (i = 0; i <= j; i++)
     615            mpz_clear (c[i]);
     616          mpfr_free_func (c, (j + 1) * sizeof (mpz_t));
     617          for (i = 2; i <= j; i++)
     618            mpfr_clear (Z[i]);
     619          mpfr_free_func (Z, (j + 1) * sizeof (mpfr_t));
     620        }
     621  #endif /* end of fast argument reconstruction */
     622  
     623        for (; l < k; l++)
     624          {
     625            mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
     626            mpfr_mul (t, t, u, MPFR_RNDN);     /* (1+u)^(2l+1) */
     627          }
     628        /* now t: (1+u)^(2k-1) */
     629        /* instead of computing log(sqrt(2*Pi)/t), we compute
     630           1/2*log(2*Pi/t^2), which trades a square root for a square */
     631        mpfr_sqr (t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
     632        mpfr_div (v, v, t, MPFR_RNDN);
     633        /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
     634  #ifdef IS_GAMMA
     635        err_s = MPFR_GET_EXP(s);
     636        mpfr_exp (s, s, MPFR_RNDN);
     637        /* If s is +Inf, we compute exp(lngamma(z0)). */
     638        if (mpfr_inf_p (s))
     639          {
     640            inexact = mpfr_explgamma (y, z0, &expo, s, t, rnd);
     641            if (inexact)
     642              goto end0;
     643            else
     644              goto ziv_next;
     645          }
     646        /* before the exponential, we have s = s0 + h where
     647           |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
     648           For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
     649           |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
     650        /* d = 1.2 * (2.0 * (double) m + 48.0); */
     651        /* the error on s is bounded by d*2^err_s * 2^(-w) */
     652        mpfr_sqrt (t, v, MPFR_RNDN);
     653        /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
     654           thus t = sqrt(v0)*(1+u)^(2k+3/2). */
     655        mpfr_mul (s, s, t, MPFR_RNDN);
     656        /* the error on input s is bounded by (1+u)^(d*2^err_s),
     657           and that on t is (1+u)^(2k+3/2), thus the
     658           total error is (1+u)^(d*2^err_s+2k+5/2) */
     659        /* err_s += __gmpfr_ceil_log2 (d); */
     660        /* since d = 1.2 * (2m+48), ceil(log2(d)) = 2 + ceil(log2(0.6*m+14.4))
     661           <= 2 + ceil(log2(0.6*m+15)) */
     662        {
     663          unsigned long mm = (1 + m / 5) * 3; /* 0.6*m <= mm */
     664          err_s += 2 + __gmpfr_int_ceil_log2 (mm + 15);
     665        }
     666        err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
     667        err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
     668  #else
     669        mpfr_log (t, v, MPFR_RNDN);
     670        /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
     671           thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
     672           for |u| <= 2^(-3), the absolute error on log(v) is bounded by
     673           1.07*(4k+1)*u, and the rounding error by ulp(t). */
     674        mpfr_div_2ui (t, t, 1, MPFR_RNDN);
     675        /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
     676           We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
     677           since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
     678           Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
     679        err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
     680          __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
     681        err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
     682          __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
     683        mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
     684        /* the final error in ulp(s) is
     685           <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
     686           <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
     687           <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
     688        err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
     689        err_s += 1 - MPFR_GET_EXP(s);
     690  #endif
     691        if (MPFR_LIKELY (MPFR_CAN_ROUND (s, w - err_s, precy, rnd)))
     692          break;
     693  #ifdef IS_GAMMA
     694      ziv_next:
     695  #endif
     696        MPFR_ZIV_NEXT (loop, w);
     697      }
     698  
     699  #ifdef IS_GAMMA
     700   end0:
     701  #endif
     702  
     703   end:
     704    if (inexact == 0)
     705      inexact = mpfr_set (y, s, rnd);
     706    MPFR_ZIV_FREE (loop);
     707  
     708    mpfr_clear (s);
     709    mpfr_clear (t);
     710    mpfr_clear (u);
     711    mpfr_clear (v);
     712    mpfr_clear (z);
     713  
     714    MPFR_SAVE_EXPO_FREE (expo);
     715    return mpfr_check_range (y, inexact, rnd);
     716  }
     717  
     718  #ifndef IS_GAMMA
     719  
     720  int
     721  mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
     722  {
     723    int inex;
     724  
     725    MPFR_LOG_FUNC
     726      (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
     727       ("y[%Pd]=%.*Rg inexact=%d",
     728        mpfr_get_prec (y), mpfr_log_prec, y, inex));
     729  
     730    /* special cases */
     731    if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x) ||
     732                       (MPFR_IS_NEG (x) && mpfr_integer_p (x))))
     733      {
     734        if (MPFR_IS_NAN (x))
     735          {
     736            MPFR_SET_NAN (y);
     737            MPFR_RET_NAN;
     738          }
     739        else /* lngamma(+/-Inf) = lngamma(non-positive integer) = +Inf */
     740          {
     741            if (!MPFR_IS_INF (x))
     742              MPFR_SET_DIVBY0 ();
     743            MPFR_SET_INF (y);
     744            MPFR_SET_POS (y);
     745            MPFR_RET (0);  /* exact */
     746          }
     747      }
     748  
     749    /* if -2k-1 < x < -2k <= 0, then lngamma(x) = NaN */
     750    if (MPFR_IS_NEG (x) && unit_bit (x) == 0)
     751      {
     752        MPFR_SET_NAN (y);
     753        MPFR_RET_NAN;
     754      }
     755  
     756    inex = mpfr_lngamma_aux (y, x, rnd);
     757    return inex;
     758  }
     759  
     760  int
     761  mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
     762  {
     763    int inex;
     764  
     765    MPFR_LOG_FUNC
     766      (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
     767       ("y[%Pd]=%.*Rg signp=%d inexact=%d",
     768        mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex));
     769  
     770    *signp = 1;  /* most common case */
     771  
     772    if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
     773      {
     774        if (MPFR_IS_NAN (x))
     775          {
     776            MPFR_SET_NAN (y);
     777            MPFR_RET_NAN;
     778          }
     779        else
     780          {
     781            if (MPFR_IS_ZERO (x))
     782              MPFR_SET_DIVBY0 ();
     783            *signp = MPFR_INT_SIGN (x);
     784            MPFR_SET_INF (y);
     785            MPFR_SET_POS (y);
     786            MPFR_RET (0);
     787          }
     788      }
     789  
     790    if (MPFR_IS_NEG (x))
     791      {
     792        if (mpfr_integer_p (x))
     793          {
     794            MPFR_SET_INF (y);
     795            MPFR_SET_POS (y);
     796            MPFR_SET_DIVBY0 ();
     797            MPFR_RET (0);
     798          }
     799  
     800        if (unit_bit (x) == 0)
     801          *signp = -1;
     802  
     803        /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
     804           thus |gamma(x)| = -1/x + euler + O(x), and
     805           log |gamma(x)| = -log(-x) - euler*x + O(x^2).
     806           More precisely we have for -0.4 <= x < 0:
     807           -log(-x) <= log |gamma(x)| <= -log(-x) - x.
     808           Since log(x) is not representable, we may have an instance of the
     809           Table Maker Dilemma. The only way to ensure correct rounding is to
     810           compute an interval [l,h] such that l <= -log(-x) and
     811           -log(-x) - x <= h, and check whether l and h round to the same number
     812           for the target precision and rounding modes. */
     813        if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
     814          /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
     815             thus |x| <= 0.25 < 0.4 */
     816          {
     817            mpfr_t l, h;
     818            int ok, inex2;
     819            mpfr_prec_t w = MPFR_PREC (y) + 14;
     820            mpfr_exp_t expl;
     821            MPFR_SAVE_EXPO_DECL (expo);
     822  
     823            MPFR_SAVE_EXPO_MARK (expo);
     824  
     825            while (1)
     826              {
     827                mpfr_init2 (l, w);
     828                mpfr_init2 (h, w);
     829                /* we want a lower bound on -log(-x), thus an upper bound
     830                   on log(-x), thus an upper bound on -x. */
     831                mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
     832                mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
     833                mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
     834                mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
     835                mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
     836                mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
     837                mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
     838                inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
     839                inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
     840                /* Caution: we not only need l = h, but both inexact flags
     841                   should agree. Indeed, one of the inexact flags might be
     842                   zero. In that case if we assume ln|gamma(x)| cannot be
     843                   exact, the other flag should be correct. We are conservative
     844                   here and request that both inexact flags agree. */
     845                ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
     846                if (ok)
     847                  mpfr_set (y, h, rnd); /* exact */
     848                else
     849                  expl = MPFR_EXP (l);
     850                mpfr_clear (l);
     851                mpfr_clear (h);
     852                if (ok)
     853                  {
     854                    MPFR_SAVE_EXPO_FREE (expo);
     855                    return mpfr_check_range (y, inex, rnd);
     856                  }
     857                /* if ulp(log(-x)) <= |x| there is no reason to loop,
     858                   since the width of [l, h] will be at least |x| */
     859                if (expl < MPFR_EXP (x) + w)
     860                  break;
     861                w += MPFR_INT_CEIL_LOG2(w) + 3;
     862              }
     863  
     864            MPFR_SAVE_EXPO_FREE (expo);
     865          }
     866      }
     867  
     868    inex = mpfr_lngamma_aux (y, x, rnd);
     869    return inex;
     870  }
     871  
     872  #endif