1  /* mpfr_exp_2 -- exponential of a floating-point number
       2                   using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
       3  
       4  Copyright 1999-2023 Free Software Foundation, Inc.
       5  Contributed by the AriC and Caramba projects, INRIA.
       6  
       7  This file is part of the GNU MPFR Library.
       8  
       9  The GNU MPFR Library is free software; you can redistribute it and/or modify
      10  it under the terms of the GNU Lesser General Public License as published by
      11  the Free Software Foundation; either version 3 of the License, or (at your
      12  option) any later version.
      13  
      14  The GNU MPFR Library is distributed in the hope that it will be useful, but
      15  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      16  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      17  License for more details.
      18  
      19  You should have received a copy of the GNU Lesser General Public License
      20  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      21  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      22  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      23  
      24  #define MPFR_NEED_LONGLONG_H  /* MPFR_INT_CEIL_LOG2 */
      25  #include "mpfr-impl.h"
      26  
      27  static unsigned long
      28  mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
      29  static unsigned long
      30  mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *);
      31  static mpfr_exp_t
      32  mpz_normalize  (mpz_t, mpz_t, mpfr_exp_t);
      33  static mpfr_exp_t
      34  mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t);
      35  
      36  /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
      37     Otherwise do nothing and return 0.
      38   */
      39  static mpfr_exp_t
      40  mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q)
      41  {
      42    size_t k;
      43  
      44    MPFR_MPZ_SIZEINBASE2 (k, z);
      45    MPFR_ASSERTD (k == (mpfr_uexp_t) k);
      46    if (MPFR_LIKELY(q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q))
      47      {
      48        mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
      49        return (mpfr_exp_t) k - q;
      50      }
      51    mpz_set (rop, z);
      52    return 0;
      53  }
      54  
      55  /* if expz > target, shift z by (expz-target) bits to the left.
      56     if expz < target, shift z by (target-expz) bits to the right.
      57     Returns target.
      58  */
      59  static mpfr_exp_t
      60  mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target)
      61  {
      62    if (MPFR_LIKELY(target > expz))
      63      mpz_fdiv_q_2exp (rop, z, target - expz);
      64    else
      65      mpz_mul_2exp (rop, z, expz - target);
      66    return target;
      67  }
      68  
      69  /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
      70     where x = n*log(2)+(2^K)*r
      71     together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
      72     evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
      73     This function returns with the exact flags due to exp.
      74  */
      75  int
      76  mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
      77  {
      78    long n;
      79    unsigned long K, k, l, err; /* FIXME: Which type ? */
      80    int error_r;
      81    mpfr_exp_t exps, expx;
      82    mpfr_prec_t q, precy;
      83    int inexact;
      84    mpfr_t s, r;
      85    mpz_t ss;
      86    MPFR_GROUP_DECL(group);
      87    MPFR_ZIV_DECL (loop);
      88  
      89    MPFR_LOG_FUNC
      90      (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
      91       ("y[%Pd]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
      92        inexact));
      93  
      94    expx = MPFR_GET_EXP (x);
      95    precy = MPFR_PREC(y);
      96  
      97    /* First perform argument reduction: if x' = x - n*log(2) we have
      98       exp(x) = exp(x)*2^n. We should take n near from x/log(2) but it does not
      99       need to be exact.
     100       Warning: we cannot use the 'double' type here, since on 64-bit machines
     101       x may be as large as 2^62*log(2) without overflow, and then x/log(2)
     102       is about 2^62: not every integer of that size can be represented as a
     103       'double', thus the argument reduction would fail. */
     104    if (MPFR_UNLIKELY(expx <= -2))
     105      /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
     106      n = 0;
     107    else
     108      {
     109        mp_limb_t r_limb[(sizeof (long) -1) / sizeof(mp_limb_t) + 1];
     110        /* Note: we use precision sizeof (long) * CHAR_BIT - 1 here since it is
     111           more efficient that full limb precision.
     112           The value of n will depend on whether MPFR_LONG_WITHIN_LIMB is
     113           defined or not. For instance, for r = 0.111E0, one gets n = 0
     114           in the former case and n = 1 in the latter case. */
     115        MPFR_TMP_INIT1(r_limb, r, sizeof (long) * CHAR_BIT - 1);
     116        mpfr_div (r, x, __gmpfr_const_log2_RNDD, MPFR_RNDN);
     117  #ifdef MPFR_LONG_WITHIN_LIMB
     118        /* The following code assume an unsigned long can fit in a mp_limb_t */
     119        {
     120          mp_limb_t a;
     121          mpfr_exp_t exp;
     122          /* Read the long directly (faster than using mpfr_get_si
     123             since it fits, it is not singular, it can't be zero
     124             and there is no conversion to do) */
     125          MPFR_ASSERTD (MPFR_NOTZERO (r));
     126          exp = MPFR_GET_EXP (r);
     127          MPFR_ASSERTD (exp <= GMP_NUMB_BITS);
     128          if (exp >= 1)
     129            {
     130              a = MPFR_MANT(r)[0] >> (GMP_NUMB_BITS - exp);
     131              n = MPFR_IS_POS (r) ? a : a <= LONG_MAX ? - (long) a : LONG_MIN;
     132            }
     133          else
     134            n = 0;
     135        }
     136  #else
     137        /* Use generic way to get the long */
     138        n = mpfr_get_si (r, MPFR_RNDN);
     139  #endif
     140      }
     141    /* we have |x| <= (|n|+1)*log(2) */
     142    MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
     143  
     144    /* error_r bounds the cancelled bits in x - n*log(2) */
     145    if (MPFR_UNLIKELY (n == 0))
     146      error_r = 0;
     147    else
     148      {
     149        error_r = mpfr_nbits_ulong (SAFE_ABS (unsigned long, n) + 1);
     150        /* we have |x| <= 2^error_r * log(2) */
     151      }
     152  
     153    /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
     154       n/K terms costs about n/(2K) multiplications when computed in fixed
     155       point */
     156    K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2) + 3
     157      : __gmpfr_cuberoot (4*precy);
     158    l = (precy - 1) / K + 1;
     159    err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
     160    /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
     161    q = precy + err + K + 10;
     162    /* if |x| >> 1, take into account the cancelled bits */
     163    if (expx > 0)
     164      q += expx;
     165  
     166    /* Even with to the mpfr_prec_round below, it is possible to use
     167       the MPFR_GROUP_* macros here because mpfr_prec_round is only
     168       called in a special case. */
     169    MPFR_GROUP_INIT_2(group, q + error_r, r, s);
     170    mpz_init (ss);
     171  
     172    /* the algorithm consists in computing an upper bound of exp(x) using
     173       a precision of q bits, and see if we can round to MPFR_PREC(y) taking
     174       into account the maximal error. Otherwise we increase q. */
     175    MPFR_ZIV_INIT (loop, q);
     176    for (;;)
     177      {
     178        MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
     179                       n, K, l, (unsigned long) q, error_r));
     180  
     181        /* First reduce the argument to r = x - n * log(2),
     182           so that r is small in absolute value. We want an upper
     183           bound on r to get an upper bound on exp(x). */
     184  
     185        /* if n<0, we have to get an upper bound of log(2)
     186           in order to get an upper bound of r = x-n*log(2) */
     187        mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
     188        /* s is within 1 ulp(s) of log(2) */
     189  
     190        mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU);
     191        /* r is within 3 ulps of |n|*log(2) */
     192        if (n < 0)
     193          MPFR_CHANGE_SIGN (r);
     194        /* r <= n*log(2), within 3 ulps */
     195  
     196        MPFR_LOG_VAR (x);
     197        MPFR_LOG_VAR (r);
     198  
     199        mpfr_sub (r, x, r, MPFR_RNDU);
     200  
     201        while (MPFR_IS_PURE_FP(r) && MPFR_IS_NEG (r))
     202          { /* initial approximation n was too large */
     203            n--;
     204            mpfr_add (r, r, s, MPFR_RNDU);
     205          }
     206  
     207        /* if r is 0, we cannot round correctly */
     208        if (MPFR_LIKELY(MPFR_IS_PURE_FP (r)))
     209          {
     210            /* since there was a cancellation in x - n*log(2), the low error_r
     211               bits from r are zero and thus non significant, thus we can reduce
     212               the working precision */
     213            if (MPFR_LIKELY(error_r > 0))
     214              {
     215                MPFR_ASSERTD( MPFR_PREC(r) > q);
     216                /* since MPFR_PREC(r) > q, there is no reallocation to do,
     217                   and it is safe to use it with MPFR_GROUP functions */
     218                mpfr_prec_round (r, q, MPFR_RNDU);
     219              }
     220            /* the error on r is at most 3 ulps (3 ulps if error_r = 0,
     221               and 1 + 3/2 if error_r > 0) */
     222            MPFR_LOG_VAR (r);
     223            MPFR_ASSERTD (MPFR_IS_POS (r));
     224            mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */
     225  
     226            /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
     227            MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
     228            l = (precy < MPFR_EXP_2_THRESHOLD)
     229              ? mpfr_exp2_aux (ss, r, q, &exps)   /* naive method */
     230              : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
     231  
     232            MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
     233                           l, (unsigned long) q, (K + l) * (double) q * q));
     234  
     235            for (k = 0; k < K; k++)
     236              {
     237                mpz_mul (ss, ss, ss);
     238                exps *= 2;
     239                exps += mpz_normalize (ss, ss, q);
     240              }
     241            mpfr_set_z_2exp (s, ss, exps, MPFR_RNDN);
     242  
     243            /* error is at most 2^K*l, plus 2 to take into account of
     244               the error of 3 ulps on r */
     245            err = K + MPFR_INT_CEIL_LOG2 (l) + 2;
     246  
     247            MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
     248            MPFR_LOG_VAR (s);
     249            MPFR_LOG_MSG (("err=%lu bits\n", K));
     250  
     251            if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode)))
     252              {
     253                MPFR_CLEAR_FLAGS ();
     254                inexact = mpfr_mul_2si (y, s, n, rnd_mode);
     255                break;
     256              }
     257          }
     258        MPFR_ZIV_NEXT (loop, q);
     259        MPFR_GROUP_REPREC_2(group, q+error_r, r, s);
     260      }
     261    MPFR_ZIV_FREE (loop);
     262    mpz_clear (ss);
     263    MPFR_GROUP_CLEAR (group);
     264  
     265    return inexact;
     266  }
     267  
     268  /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
     269     using naive method with O(l) multiplications.
     270     Return the number of iterations l.
     271     The absolute error on s is less than 3*l*(l+1)*2^(-q).
     272     Version using fixed-point arithmetic with mpz instead
     273     of mpfr for internal computations.
     274  */
     275  static unsigned long
     276  mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
     277  {
     278    unsigned long l;
     279    mpfr_exp_t dif, expt, expr;
     280    mpz_t t, rr;
     281    mp_size_t sbit, tbit;
     282  
     283    MPFR_ASSERTD (MPFR_IS_PURE_FP (r));
     284  
     285    expt = 0;
     286    *exps = 1 - (mpfr_exp_t) q;                   /* s = 2^(q-1) */
     287    mpz_init (t);
     288    mpz_init (rr);
     289    mpz_set_ui (t, 1);
     290    mpz_set_ui (s, 1);
     291    mpz_mul_2exp (s, s, q-1);
     292    expr = mpfr_get_z_2exp (rr, r);               /* no error here */
     293  
     294    l = 0;
     295    for (;;)
     296      {
     297        l++;
     298        mpz_mul(t, t, rr);
     299        expt += expr;
     300        MPFR_MPZ_SIZEINBASE2 (sbit, s);
     301        MPFR_MPZ_SIZEINBASE2 (tbit, t);
     302        dif = *exps + sbit - expt - tbit;
     303        /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
     304        expt += mpz_normalize (t, t, (mpfr_exp_t) q - dif);
     305        /* error at most 2^(1-q) */
     306        if (l > 1)
     307          {
     308            /* GMP doesn't optimize the case of power of 2 */
     309            if (IS_POW2(l))
     310              {
     311                int bits = MPFR_INT_CEIL_LOG2(l);
     312                mpz_fdiv_q_2exp (t, t, bits);     /* error at most 2^(1-q) */
     313              }
     314            else
     315              {
     316                mpz_fdiv_q_ui (t, t, l);          /* error at most 2^(1-q) */
     317              }
     318            /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
     319            MPFR_ASSERTD (expt == *exps);
     320          }
     321        if (mpz_sgn (t) == 0)
     322          break;
     323        mpz_add (s, s, t);                        /* no error here: exact */
     324        /* ensures rr has the same size as t: after several shifts, the error
     325           on rr is still at most ulp(t)=ulp(s) */
     326        MPFR_MPZ_SIZEINBASE2 (tbit, t);
     327        expr += mpz_normalize (rr, rr, tbit);
     328      }
     329  
     330    mpz_clear (t);
     331    mpz_clear (rr);
     332  
     333    return 3 * l * (l + 1);
     334  }
     335  
     336  /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
     337     using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
     338     Return l.
     339     Uses m multiplications of full size and 2l/m of decreasing size,
     340     i.e. a total equivalent to about m+l/m full multiplications,
     341     i.e. 2*sqrt(l) for m=sqrt(l).
     342     NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
     343     is no longer used (r6919); sizer was the number of limbs of r.
     344     Version using mpz. ss must have at least (sizer+1) limbs.
     345     The error is bounded by (l^2+4*l) ulps where l is the return value.
     346  */
     347  static unsigned long
     348  mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps)
     349  {
     350    mpfr_exp_t expr, *expR, expt;
     351    mpfr_prec_t ql;
     352    unsigned long l, m, i;
     353    mpz_t t, *R, rr, tmp;
     354    mp_size_t sbit, rrbit;
     355    MPFR_TMP_DECL(marker);
     356  
     357    /* estimate value of l */
     358    MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
     359    l = q / (- MPFR_GET_EXP (r));
     360    m = __gmpfr_isqrt (l);
     361    /* we access R[2], thus we need m >= 2 */
     362    if (m < 2)
     363      m = 2;
     364  
     365    MPFR_TMP_MARK(marker);
     366    R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t));     /* R[i] is r^i */
     367    expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t));
     368    /* expR[i] is the exponent for R[i] */
     369    mpz_init (tmp);
     370    mpz_init (rr);
     371    mpz_init (t);
     372    mpz_set_ui (s, 0);
     373    *exps = 1 - q;                        /* 1 ulp = 2^(1-q) */
     374    for (i = 0 ; i <= m ; i++)
     375      mpz_init (R[i]);
     376    expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */
     377    expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */
     378    mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */
     379    mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */
     380    expR[2] = 1 - q;
     381    for (i = 3 ; i <= m ; i++)
     382      {
     383        if ((i & 1) == 1)
     384          mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
     385        else
     386          mpz_mul (t, R[i/2], R[i/2]);
     387        mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */
     388        expR[i] = 1 - q;
     389      }
     390    mpz_set_ui (R[0], 1);
     391    mpz_mul_2exp (R[0], R[0], q-1);
     392    expR[0] = 1-q; /* R[0]=1 */
     393    mpz_set_ui (rr, 1);
     394    expr = 0; /* rr contains r^l/l! */
     395    /* by induction: err(rr) <= 2*l ulps */
     396  
     397    l = 0;
     398    ql = q; /* precision used for current giant step */
     399    do
     400      {
     401        /* all R[i] must have exponent 1-ql */
     402        if (l != 0)
     403          for (i = 0 ; i < m ; i++)
     404            expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql);
     405        /* the absolute error on R[i]*rr is still 2*i-1 ulps */
     406        expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql);
     407        /* err(t) <= 2*m-1 ulps */
     408        /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
     409           using Horner's scheme */
     410        for (i = m-1 ; i-- != 0 ; )
     411          {
     412            mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */
     413            mpz_add (t, t, R[i]);
     414          }
     415        /* now err(t) <= (3m-2) ulps */
     416  
     417        /* now multiplies t by r^l/l! and adds to s */
     418        mpz_mul (t, t, rr);
     419        expt += expr;
     420        expt = mpz_normalize2 (t, t, expt, *exps);
     421        /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
     422        MPFR_ASSERTD (expt == *exps);
     423        mpz_add (s, s, t); /* no error here */
     424  
     425        /* updates rr, the multiplication of the factors l+i could be done
     426           using binary splitting too, but it is not sure it would save much */
     427        mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
     428        expr += expR[m];
     429        mpz_set_ui (tmp, 1);
     430        for (i = 1 ; i <= m ; i++)
     431          mpz_mul_ui (tmp, tmp, l + i);
     432        mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
     433        l += m;
     434        if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
     435          break;
     436        expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
     437        if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
     438          rrbit = 1;
     439        else
     440          MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
     441        MPFR_MPZ_SIZEINBASE2 (sbit, s);
     442        ql = q - *exps - sbit + expr + rrbit;
     443        /* TODO: Wrong cast. I don't want what is right, but this is
     444           certainly wrong */
     445      }
     446    while ((size_t) expr + rrbit > (size_t) -q);
     447  
     448    for (i = 0 ; i <= m ; i++)
     449      mpz_clear (R[i]);
     450    MPFR_TMP_FREE(marker);
     451    mpz_clear (rr);
     452    mpz_clear (t);
     453    mpz_clear (tmp);
     454  
     455    return l * (l + 4);
     456  }