1  /* mpfr_log_ui -- compute natural logarithm of an unsigned long
       2  
       3  Copyright 2014-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #define MPFR_NEED_LONGLONG_H
      24  #include "mpfr-impl.h"
      25  
      26  /* FIXME: mpfr_log_ui is much slower than mpfr_log on some values of n,
      27     e.g. about 4 times as slow for n around ULONG_MAX/3 on an
      28     x86_64 Linux machine, for 10^6 bits of precision. The reason is that
      29     for say n=6148914691236517205 and prec=10^6, the value of T computed
      30     has more than 50M bits, which is much more than needed. Indeed the
      31     binary splitting algorithm for series with a finite radius of convergence
      32     gives rationals of size n*log(n) for a target precision n. One might
      33     truncate the rationals inside the algorithm, but then the error analysis
      34     should be redone. */
      35  
      36  /* Cf https://www.ginac.de/CLN/binsplit.pdf - the Taylor series of log(1+x)
      37     up to order N for x=p/2^k is T/(B*Q).
      38     P[0] <- (-p)^(n2-n1) [with opposite sign when n1=1]
      39     q <- k*(n2-n1) [corresponding to Q[0] = 2^q]
      40     B[0] <- n1 * (n1+1) * ... * (n2-1)
      41     T[0] <- B[0]*Q[0] * S(n1,n2)
      42     where S(n1,n2) = -sum((-x)^(i-n1+1)/i, i=n1..n2-1)
      43     Assumes p is odd or zero, and -1/3 <= x = p/2^k <= 1/3.
      44  */
      45  static void
      46  S (mpz_t *P, unsigned long *q, mpz_t *B, mpz_t *T, unsigned long n1,
      47     unsigned long n2, long p, unsigned long k, int need_P)
      48  {
      49    MPFR_ASSERTD (n1 < n2);
      50    MPFR_ASSERTD (p == 0 || ((unsigned long) p & 1) != 0);
      51    if (n2 == n1 + 1)
      52      {
      53        mpz_set_si (P[0], (n1 == 1) ? p : -p);
      54        *q = k;
      55        mpz_set_ui (B[0], n1);
      56        /* T = B*Q*S where S = P/(B*Q) thus T = P */
      57        mpz_set (T[0], P[0]);
      58        /* since p is odd (or zero), there is no common factor 2 between
      59           P and Q, or T and B */
      60      }
      61    else
      62      {
      63        unsigned long m = (n1 / 2) + (n2 / 2) + (n1 & 1UL & n2), q1;
      64        /* m = floor((n1+n2)/2) */
      65  
      66        MPFR_ASSERTD (n1 < m && m < n2);
      67        S (P, q, B, T, n1, m, p, k, 1);
      68        S (P + 1, &q1, B + 1, T + 1, m, n2, p, k, need_P);
      69  
      70        /* T0 <- T0*B1*Q1 + P0*B0*T1 */
      71        mpz_mul (T[1], T[1], P[0]);
      72        mpz_mul (T[1], T[1], B[0]);
      73        mpz_mul (T[0], T[0], B[1]);
      74        /* Q[1] = 2^q1 */
      75        mpz_mul_2exp (T[0], T[0], q1); /* mpz_mul (T[0], T[0], Q[1]) */
      76        mpz_add (T[0], T[0], T[1]);
      77        if (need_P)
      78          mpz_mul (P[0], P[0], P[1]);
      79        *q += q1; /* mpz_mul (Q[0], Q[0], Q[1]) */
      80        mpz_mul (B[0], B[0], B[1]);
      81  
      82        /* there should be no common factors 2 between P, Q and T,
      83           since P is odd (or zero) */
      84      }
      85  }
      86  
      87  int
      88  mpfr_log_ui (mpfr_ptr x, unsigned long n, mpfr_rnd_t rnd_mode)
      89  {
      90    unsigned long k;
      91    mpfr_prec_t w; /* working precision */
      92    mpz_t three_n, *P, *B, *T;
      93    mpfr_t t, q;
      94    int inexact;
      95    unsigned long N, lgN, i, kk;
      96    long p;
      97    MPFR_GROUP_DECL(group);
      98    MPFR_TMP_DECL(marker);
      99    MPFR_ZIV_DECL(loop);
     100    MPFR_SAVE_EXPO_DECL (expo);
     101  
     102    if (n <= 2)
     103      {
     104        if (n == 0)
     105          {
     106            MPFR_SET_INF (x);
     107            MPFR_SET_NEG (x);
     108            MPFR_SET_DIVBY0 ();
     109            MPFR_RET (0); /* log(0) is an exact -infinity */
     110          }
     111        else if (n == 1)
     112          {
     113            MPFR_SET_ZERO (x);
     114            MPFR_SET_POS (x);
     115            MPFR_RET (0); /* only "normal" case where the result is exact */
     116          }
     117        /* now n=2 */
     118        return mpfr_const_log2 (x, rnd_mode);
     119      }
     120  
     121    /* here n >= 3 */
     122  
     123    /* Argument reduction: compute k such that 2/3 < n/2^k < 4/3,
     124       i.e., 2^(k+1) < 3n < 2^(k+2).
     125  
     126       FIXME: we could do better by considering n/(2^k*3^i*5^j),
     127       which reduces the maximal distance to 1 from 1/3 to 1/8,
     128       thus needing about 1.89 less terms in the Taylor expansion of
     129       the reduced argument. Then log(2^k*3^i*5^j) can be computed
     130       using a combination of log(16/15), log(25/24) and log(81/80),
     131       see Section 6.5 of "A Fortran Multiple-Precision Arithmetic Package",
     132       Richard P. Brent, ACM Transactions on Mathematical Software, 1978. */
     133  
     134    mpz_init_set_ui (three_n, n);
     135    mpz_mul_ui (three_n, three_n, 3);
     136    k = mpz_sizeinbase (three_n, 2) - 2;
     137    MPFR_ASSERTD (k >= 2);
     138    mpz_clear (three_n);
     139  
     140    /* The reduced argument is n/2^k - 1 = (n-2^k)/2^k.
     141       Compute p = n-2^k. One has: |p| = |n-2^k| < 2^k/3 < n/2 <= LONG_MAX,
     142       so that p and -p both fit in a long. */
     143    if (k < sizeof (unsigned long) * CHAR_BIT)  /* assume no padding bits */
     144      n -= 1UL << k;
     145    /* n is now the value of p mod ULONG_MAX+1.
     146       Since |p| <= LONG_MAX, if n > LONG_MAX, this means that p < 0 and
     147       -n as an unsigned long value is at most LONG_MAX, thus fits in a
     148       long. */
     149    p = ULONG2LONG (n);
     150  
     151    MPFR_TMP_MARK(marker);
     152    w = MPFR_PREC(x) + MPFR_INT_CEIL_LOG2 (MPFR_PREC(x)) + 10;
     153    MPFR_GROUP_INIT_2(group, w, t, q);
     154    MPFR_SAVE_EXPO_MARK (expo);
     155  
     156    kk = k;
     157    if (p != 0)
     158      while ((p % 2) == 0) /* replace p/2^kk by (p/2)/2^(kk-1) */
     159        {
     160          p /= 2;
     161          kk --;
     162        }
     163  
     164    MPFR_ZIV_INIT (loop, w);
     165    for (;;)
     166      {
     167        mpfr_t tmp;
     168        unsigned int err;
     169        unsigned long q0;
     170  
     171        /* we need at most w/log2(2^kk/|p|) terms for an accuracy of w bits */
     172        mpfr_init2 (tmp, 32);
     173        mpfr_set_ui (tmp, (p > 0) ? p : -p, MPFR_RNDU);
     174        mpfr_log2 (tmp, tmp, MPFR_RNDU);
     175        mpfr_ui_sub (tmp, kk, tmp, MPFR_RNDD);
     176        MPFR_ASSERTN (w <= ULONG_MAX);
     177        mpfr_ui_div (tmp, w, tmp, MPFR_RNDU);
     178        N = mpfr_get_ui (tmp, MPFR_RNDU);
     179        if (N < 2)
     180          N = 2;
     181        lgN = MPFR_INT_CEIL_LOG2 (N) + 1;
     182        mpfr_clear (tmp);
     183        P = (mpz_t *) MPFR_TMP_ALLOC (3 * lgN * sizeof (mpz_t));
     184        B = P + lgN;
     185        T = B + lgN;
     186        for (i = 0; i < lgN; i++)
     187          {
     188            mpz_init (P[i]);
     189            mpz_init (B[i]);
     190            mpz_init (T[i]);
     191          }
     192  
     193        S (P, &q0, B, T, 1, N, p, kk, 0);
     194        /* mpz_mul (Q[0], B[0], Q[0]); */
     195        /* mpz_mul_2exp (B[0], B[0], q0); */
     196  
     197        mpfr_set_z (t, T[0], MPFR_RNDN); /* t = P[0] * (1 + theta_1) */
     198        mpfr_set_z (q, B[0], MPFR_RNDN); /* q = B[0] * (1 + theta_2) */
     199        mpfr_mul_2ui (q, q, q0, MPFR_RNDN); /* B[0]*Q[0] */
     200        mpfr_div (t, t, q, MPFR_RNDN);   /* t = T[0]/(B[0]*Q[0])*(1 + theta_3)^3
     201                                              = log(n/2^k) * (1 + theta_4)^4
     202                                              for |theta_i| < 2^(-w) */
     203  
     204        /* argument reconstruction: add k*log(2) */
     205        mpfr_const_log2 (q, MPFR_RNDN);
     206        mpfr_mul_ui (q, q, k, MPFR_RNDN);
     207        mpfr_add (t, t, q, MPFR_RNDN);
     208        for (i = 0; i < lgN; i++)
     209          {
     210            mpz_clear (P[i]);
     211            mpz_clear (B[i]);
     212            mpz_clear (T[i]);
     213          }
     214        /* The maximal error is 5 ulps for P/Q, since |(1+/-u)^4 - 1| < 5*u
     215           for u < 2^(-12), k ulps for k*log(2), and 1 ulp for the addition,
     216           thus at most k+6 ulps.
     217           Note that there might be some cancellation in the addition: the worst
     218           case is when log(1 + p/2^kk) = log(2/3) ~ -0.405, and with n=3 which
     219           gives k=2, thus we add 2*log(2) = 1.386. Thus in the worst case we
     220           have an exponent decrease of 1, which accounts for +1 in the error. */
     221        err = MPFR_INT_CEIL_LOG2 (k + 6) + 1;
     222        if (MPFR_LIKELY (MPFR_CAN_ROUND (t, w - err, MPFR_PREC(x), rnd_mode)))
     223          break;
     224  
     225        MPFR_ZIV_NEXT (loop, w);
     226        MPFR_GROUP_REPREC_2(group, w, t, q);
     227      }
     228    MPFR_ZIV_FREE (loop);
     229  
     230    inexact = mpfr_set (x, t, rnd_mode);
     231  
     232    MPFR_GROUP_CLEAR(group);
     233    MPFR_TMP_FREE(marker);
     234  
     235    MPFR_SAVE_EXPO_FREE (expo);
     236    return mpfr_check_range (x, inexact, rnd_mode);
     237  }