1  /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
       2  
       3  Copyright 1999-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #define MPFR_NEED_LONGLONG_H
      24  #include "mpfr-impl.h"
      25  
      26  /* agm(x,y) is between x and y, so we don't need to save exponent range */
      27  int
      28  mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
      29  {
      30    int compare, inexact;
      31    mp_size_t s;
      32    mpfr_prec_t p, q;
      33    mp_limb_t *up, *vp, *ufp, *vfp;
      34    mpfr_t u, v, uf, vf, sc1, sc2;
      35    mpfr_exp_t scaleop = 0, scaleit;
      36    unsigned long n; /* number of iterations */
      37    MPFR_ZIV_DECL (loop);
      38    MPFR_TMP_DECL(marker);
      39    MPFR_SAVE_EXPO_DECL (expo);
      40  
      41    MPFR_LOG_FUNC
      42      (("op2[%Pd]=%.*Rg op1[%Pd]=%.*Rg rnd=%d",
      43        mpfr_get_prec (op2), mpfr_log_prec, op2,
      44        mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
      45       ("r[%Pd]=%.*Rg inexact=%d",
      46        mpfr_get_prec (r), mpfr_log_prec, r, inexact));
      47  
      48    /* Deal with special values */
      49    if (MPFR_ARE_SINGULAR (op1, op2))
      50      {
      51        /* If a or b is NaN, the result is NaN */
      52        if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
      53          {
      54            MPFR_SET_NAN(r);
      55            MPFR_RET_NAN;
      56          }
      57        /* now one of a or b is Inf or 0 */
      58        /* If a and b is +Inf, the result is +Inf.
      59           Otherwise if a or b is -Inf or 0, the result is NaN */
      60        else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
      61          {
      62            if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
      63              {
      64                MPFR_SET_INF(r);
      65                MPFR_SET_SAME_SIGN(r, op1);
      66                MPFR_RET(0); /* exact */
      67              }
      68            else
      69              {
      70                MPFR_SET_NAN(r);
      71                MPFR_RET_NAN;
      72              }
      73          }
      74        else /* a and b are neither NaN nor Inf, and one is zero */
      75          {  /* If a or b is 0, the result is +0, in particular because the
      76                result is always >= 0 with our definition (Maple sometimes
      77                chooses a different sign for GaussAGM, but it uses another
      78                definition, with possible negative results). */
      79            MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
      80            MPFR_SET_POS (r);
      81            MPFR_SET_ZERO (r);
      82            MPFR_RET (0); /* exact */
      83          }
      84      }
      85  
      86    /* If a or b is negative (excluding -Infinity), the result is NaN */
      87    if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
      88      {
      89        MPFR_SET_NAN(r);
      90        MPFR_RET_NAN;
      91      }
      92  
      93    /* Precision of the following calculus */
      94    q = MPFR_PREC(r);
      95    p = q + MPFR_INT_CEIL_LOG2(q) + 15;
      96    MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
      97    s = MPFR_PREC2LIMBS (p);
      98  
      99    /* b (op2) and a (op1) are the 2 operands but we want b >= a */
     100    compare = mpfr_cmp (op1, op2);
     101    if (MPFR_UNLIKELY( compare == 0 ))
     102      return mpfr_set (r, op1, rnd_mode);
     103    else if (compare > 0)
     104      {
     105        mpfr_srcptr t = op1;
     106        op1 = op2;
     107        op2 = t;
     108      }
     109  
     110    /* Now b (=op2) > a (=op1) */
     111  
     112    MPFR_SAVE_EXPO_MARK (expo);
     113  
     114    MPFR_TMP_MARK(marker);
     115  
     116    /* Main loop */
     117    MPFR_ZIV_INIT (loop, p);
     118    for (;;)
     119      {
     120        mpfr_prec_t eq;
     121        unsigned long err = 0;  /* must be set to 0 at each Ziv iteration */
     122        MPFR_BLOCK_DECL (flags);
     123  
     124        /* Init temporary vars */
     125        MPFR_TMP_INIT (up, u, p, s);
     126        MPFR_TMP_INIT (vp, v, p, s);
     127        MPFR_TMP_INIT (ufp, uf, p, s);
     128        MPFR_TMP_INIT (vfp, vf, p, s);
     129  
     130        /* Calculus of un and vn */
     131      retry:
     132        MPFR_BLOCK (flags,
     133                    mpfr_mul (u, op1, op2, MPFR_RNDN);
     134                    /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
     135                    mpfr_add (v, op1, op2, MPFR_RNDN);
     136                    /* mpfr_add with !=prec is still good */);
     137        if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
     138          {
     139            mpfr_exp_t e1, e2;
     140  
     141            MPFR_ASSERTN (scaleop == 0);
     142            e1 = MPFR_GET_EXP (op1);
     143            e2 = MPFR_GET_EXP (op2);
     144  
     145            /* Let's determine scaleop to avoid an overflow/underflow. */
     146            if (MPFR_OVERFLOW (flags))
     147              {
     148                /* Let's recall that emin <= e1 <= e2 <= emax.
     149                   There has been an overflow. Thus e2 >= emax/2.
     150                   If the mpfr_mul overflowed, then e1 + e2 > emax.
     151                   If the mpfr_add overflowed, then e2 = emax.
     152                   We want: (e1 + scale) + (e2 + scale) <= emax,
     153                   i.e. scale <= (emax - e1 - e2) / 2. Let's take
     154                   scale = min(floor((emax - e1 - e2) / 2), -1).
     155                   This is OK, as:
     156                   1. emin <= scale <= -1.
     157                   2. e1 + scale >= emin. Indeed:
     158                      * If e1 + e2 > emax, then
     159                        e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
     160                                   >= (emax + e1 - emax) / 2 - 1
     161                                   >= e1 / 2 - 1 >= emin.
     162                      * Otherwise, mpfr_mul didn't overflow, therefore
     163                        mpfr_add overflowed and e2 = emax, so that
     164                        e1 > emin (see restriction below).
     165                        e1 + scale > emin - 1, thus e1 + scale >= emin.
     166                   3. e2 + scale <= emax, since scale < 0. */
     167                if (e1 + e2 > MPFR_EMAX_MAX)
     168                  {
     169                    scaleop = - (((e1 + e2) - MPFR_EMAX_MAX + 1) / 2);
     170                    MPFR_ASSERTN (scaleop < 0);
     171                  }
     172                else
     173                  {
     174                    /* The addition necessarily overflowed. */
     175                    MPFR_ASSERTN (e2 == MPFR_EMAX_MAX);
     176                    /* The case where e1 = emin and e2 = emax is not supported
     177                       here. This would mean that the precision of e2 would be
     178                       huge (and possibly not supported in practice anyway). */
     179                    MPFR_ASSERTN (e1 > MPFR_EMIN_MIN);
     180                    /* Note: this case is probably impossible to have in practice
     181                       since we need e2 = emax, and no overflow in the product.
     182                       Since the product is >= 2^(e1+e2-2), it implies
     183                       e1 + e2 - 2 <= emax, thus e1 <= 2. Now to get an overflow
     184                       we need op1 >= 1/2 ulp(op2), which implies that the
     185                       precision of op2 should be at least emax-2. On a 64-bit
     186                       computer this is impossible to have, and would require
     187                       a huge amount of memory on a 32-bit computer. */
     188                    scaleop = -1;
     189                  }
     190  
     191              }
     192            else  /* underflow only (in the multiplication) */
     193              {
     194                /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
     195                   We want: (e1 + scale) + (e2 + scale) >= emin + 1,
     196                   i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
     197                   scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
     198                   1. 1 <= scale <= emax.
     199                   2. e1 + scale >= emin + 1 >= emin.
     200                   3. e2 + scale <= scale <= emax. */
     201                MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
     202                scaleop = (MPFR_EMIN_MIN + 2 - e1 - e2) / 2;
     203                MPFR_ASSERTN (scaleop > 0);
     204              }
     205  
     206            MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
     207            MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
     208            op1 = sc1;
     209            op2 = sc2;
     210            MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
     211                           MPFR_EXP_FSPEC "d\n", scaleop));
     212            goto retry;
     213          }
     214  
     215        MPFR_CLEAR_FLAGS ();
     216        mpfr_sqrt (u, u, MPFR_RNDN);
     217        mpfr_div_2ui (v, v, 1, MPFR_RNDN);
     218  
     219        scaleit = 0;
     220        n = 1;
     221        while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
     222          {
     223            MPFR_BLOCK_DECL (flags2);
     224  
     225            MPFR_LOG_MSG (("Iteration n = %lu\n", n));
     226  
     227          retry2:
     228            mpfr_add (vf, u, v, MPFR_RNDN);  /* No overflow? */
     229            mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
     230            /* See proof in algorithms.tex */
     231            if (eq > p / 4)
     232              {
     233                mpfr_t w;
     234                MPFR_BLOCK_DECL (flags3);
     235  
     236                MPFR_LOG_MSG (("4*eq > p\n", 0));
     237  
     238                /* vf = V(k) */
     239                mpfr_init2 (w, (p + 1) / 2);
     240                MPFR_BLOCK
     241                  (flags3,
     242                   mpfr_sub (w, v, u, MPFR_RNDN);       /* e = V(k-1)-U(k-1) */
     243                   mpfr_sqr (w, w, MPFR_RNDN);          /* e = e^2 */
     244                   mpfr_div_2ui (w, w, 4, MPFR_RNDN);   /* e*= (1/2)^2*1/4  */
     245                   mpfr_div (w, w, vf, MPFR_RNDN);      /* 1/4*e^2/V(k) */
     246                   );
     247                if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
     248                  {
     249                    mpfr_sub (v, vf, w, MPFR_RNDN);
     250                    err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
     251                    mpfr_clear (w);
     252                    break;
     253                  }
     254                /* There has been an underflow because of the cancellation
     255                   between V(k-1) and U(k-1). Let's use the conventional
     256                   method. */
     257                MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
     258                mpfr_clear (w);
     259                MPFR_CLEAR_UNDERFLOW ();
     260              }
     261            /* U(k) increases, so that U.V can overflow (but not underflow). */
     262            MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
     263            if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
     264              {
     265                mpfr_exp_t scale2;
     266  
     267                scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
     268                             - MPFR_EMAX_MAX + 1) / 2);
     269                MPFR_EXP (u) += scale2;
     270                MPFR_EXP (v) += scale2;
     271                scaleit += scale2;
     272                MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
     273                               MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
     274                               n, scaleit, scale2));
     275                MPFR_CLEAR_OVERFLOW ();
     276                goto retry2;
     277              }
     278            mpfr_sqrt (u, uf, MPFR_RNDN);
     279            mpfr_swap (v, vf);
     280            n ++;
     281          }
     282  
     283        MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
     284  
     285        /* the error on v is bounded by (18n+51) ulps, or twice if there
     286           was an exponent loss in the final subtraction */
     287        err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
     288                                                   since n is about log(p) */
     289        /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
     290        if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
     291                         MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
     292          break; /* Stop the loop */
     293  
     294        /* Next iteration */
     295        MPFR_ZIV_NEXT (loop, p);
     296        s = MPFR_PREC2LIMBS (p);
     297      }
     298    MPFR_ZIV_FREE (loop);
     299  
     300    if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
     301                       != 0))
     302      {
     303        MPFR_ASSERTN (! mpfr_overflow_p ());   /* since mpfr_clear_flags */
     304        MPFR_ASSERTN (! mpfr_underflow_p ());  /* since mpfr_clear_flags */
     305        MPFR_ASSERTN (! mpfr_divby0_p ());     /* since mpfr_clear_flags */
     306        MPFR_ASSERTN (! mpfr_nanflag_p ());    /* since mpfr_clear_flags */
     307      }
     308  
     309    /* Setting of the result */
     310    inexact = mpfr_set (r, v, rnd_mode);
     311    MPFR_EXP (r) -= scaleop + scaleit;
     312  
     313    /* Let's clean */
     314    MPFR_TMP_FREE(marker);
     315  
     316    MPFR_SAVE_EXPO_FREE (expo);
     317    /* From the definition of the AGM, underflow and overflow
     318       are not possible. */
     319    return mpfr_check_range (r, inexact, rnd_mode);
     320    /* agm(u,v) can be exact for u, v rational only for u=v.
     321       Proof (due to Nicolas Brisebarre): it suffices to consider
     322       u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
     323       and a theorem due to G.V. Chudnovsky states that for x a
     324       non-zero algebraic number with |x|<1, then
     325       2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
     326       independent over Q. */
     327  }