1  /* mpfr_sin_cos -- sine and cosine of a floating-point number
       2  
       3  Copyright 2002-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #define MPFR_NEED_LONGLONG_H
      24  #include "mpfr-impl.h"
      25  
      26  /* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
      27     ie, iff x = 0 */
      28  int
      29  mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
      30  {
      31    mpfr_prec_t prec, m;
      32    int neg, reduce;
      33    mpfr_t c, xr;
      34    mpfr_srcptr xx;
      35    mpfr_exp_t err, expx;
      36    int inexy, inexz;
      37    MPFR_ZIV_DECL (loop);
      38    MPFR_SAVE_EXPO_DECL (expo);
      39  
      40    MPFR_ASSERTN (y != z);
      41  
      42    if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
      43      {
      44        if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
      45          {
      46            MPFR_SET_NAN (y);
      47            MPFR_SET_NAN (z);
      48            MPFR_RET_NAN;
      49          }
      50        else /* x is zero */
      51          {
      52            MPFR_ASSERTD (MPFR_IS_ZERO (x));
      53            MPFR_SET_ZERO (y);
      54            MPFR_SET_SAME_SIGN (y, x);
      55            /* y = 0, thus exact, but z is inexact in case of underflow
      56               or overflow */
      57            inexy = 0; /* y is exact */
      58            inexz = mpfr_set_ui (z, 1, rnd_mode);
      59            return INEX(inexy,inexz);
      60          }
      61      }
      62  
      63    MPFR_LOG_FUNC
      64      (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
      65       ("sin[%Pd]=%.*Rg cos[%Pd]=%.*Rg", mpfr_get_prec(y), mpfr_log_prec, y,
      66        mpfr_get_prec (z), mpfr_log_prec, z));
      67  
      68    MPFR_SAVE_EXPO_MARK (expo);
      69  
      70    prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
      71    m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
      72    expx = MPFR_GET_EXP (x);
      73  
      74    /* When x is close to 0, say 2^(-k), then there is a cancellation of about
      75       2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
      76       to compute sin(x) directly. VL: This is partly done by using
      77       MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
      78       functions. Moreover, any overflow on m is avoided. */
      79    if (expx < 0)
      80      {
      81        /* Warning: in case y = x, and the first call to
      82           MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
      83           we will have clobbered the original value of x.
      84           The workaround is to first compute z = cos(x) in that case, since
      85           y and z are different. */
      86        if (y != x)
      87          /* y and x differ, thus we can safely try to compute y first */
      88          {
      89            MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
      90              y, x, -2 * expx, 2, 0, rnd_mode,
      91              { inexy = _inexact;
      92                goto small_input; });
      93            if (0)
      94              {
      95              small_input:
      96                /* we can go here only if we can round sin(x) */
      97                MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
      98                  z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
      99                  { inexz = _inexact;
     100                    MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
     101                    goto end; });
     102              }
     103  
     104            /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
     105               calls failed */
     106          }
     107        else /* y and x are the same variable: try to compute z first, which
     108                necessarily differs */
     109          {
     110            MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
     111              z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
     112              { inexz = _inexact;
     113                goto small_input2; });
     114            if (0)
     115              {
     116              small_input2:
     117                /* we can go here only if we can round cos(x) */
     118                MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
     119                  y, x, -2 * expx, 2, 0, rnd_mode,
     120                  { inexy = _inexact;
     121                    MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
     122                    goto end; });
     123              }
     124          }
     125        m += 2 * (-expx);
     126      }
     127  
     128    if (prec >= MPFR_SINCOS_THRESHOLD)
     129      {
     130        MPFR_SAVE_EXPO_FREE (expo);
     131        return mpfr_sincos_fast (y, z, x, rnd_mode);
     132      }
     133  
     134    mpfr_init2 (c, m);
     135    mpfr_init2 (xr, m);
     136  
     137    MPFR_ZIV_INIT (loop, m);
     138    for (;;)
     139      {
     140        /* the following is copied from sin.c */
     141        if (expx >= 2) /* reduce the argument */
     142          {
     143            reduce = 1;
     144            MPFR_ASSERTN (expx + m - 1 <= MPFR_PREC_MAX);
     145            mpfr_set_prec (c, expx + m - 1);
     146            mpfr_set_prec (xr, m);
     147            mpfr_const_pi (c, MPFR_RNDN);
     148            mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
     149            mpfr_remainder (xr, x, c, MPFR_RNDN);
     150            mpfr_div_2ui (c, c, 1, MPFR_RNDN);
     151            if (MPFR_IS_POS (xr))
     152              mpfr_sub (c, c, xr, MPFR_RNDZ);
     153            else
     154              mpfr_add (c, c, xr, MPFR_RNDZ);
     155            if (MPFR_IS_ZERO(xr)
     156                || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
     157                || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
     158              goto next_step;
     159            xx = xr;
     160          }
     161        else /* the input argument is already reduced */
     162          {
     163            reduce = 0;
     164            xx = x;
     165          }
     166  
     167        neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
     168        mpfr_set_prec (c, m);
     169        mpfr_cos (c, xx, MPFR_RNDZ);
     170        /* If no argument reduction was performed, the error is at most ulp(c),
     171           otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
     172           ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
     173           case. */
     174        if (reduce == 0)
     175          err = m;
     176        else
     177          err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
     178        if (!MPFR_CAN_ROUND (c, err, MPFR_PREC (z), rnd_mode))
     179          goto next_step;
     180  
     181        /* We can't set z now, because in case z = x, and the MPFR_CAN_ROUND()
     182           call below fails, we will have clobbered the input.
     183           Note: m below is the precision of c; see above. */
     184        mpfr_set_prec (xr, m);
     185        mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
     186        mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
     187                                        2^(5-m) if reduce=1, and by 2^(2-m)
     188                                        otherwise */
     189        mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
     190                                             is 1, and 2^(3-m) otherwise */
     191        mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
     192                                        2^(6-m-Exp(c)) if reduce=1, and
     193                                        2^(3-m-Exp(c)) otherwise */
     194        err = 3 + 3 * reduce - MPFR_GET_EXP (c);
     195        if (neg)
     196          MPFR_CHANGE_SIGN (c);
     197  
     198        /* the absolute error on c is at most 2^(err-m), which we must put
     199           in the form 2^(EXP(c)-err). */
     200        err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
     201        if (MPFR_CAN_ROUND (c, err, MPFR_PREC (y), rnd_mode))
     202          break;
     203        /* check for huge cancellation */
     204        if (err < (mpfr_exp_t) MPFR_PREC (y))
     205          m += MPFR_PREC (y) - err;
     206        /* Check if near 1 */
     207        if (MPFR_GET_EXP (c) == 1
     208            && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
     209          m += m;
     210  
     211      next_step:
     212        MPFR_ZIV_NEXT (loop, m);
     213        mpfr_set_prec (c, m);
     214      }
     215    MPFR_ZIV_FREE (loop);
     216  
     217    inexy = mpfr_set (y, c, rnd_mode);
     218    inexz = mpfr_set (z, xr, rnd_mode);
     219  
     220    mpfr_clear (c);
     221    mpfr_clear (xr);
     222  
     223   end:
     224    MPFR_SAVE_EXPO_FREE (expo);
     225    /* FIXME: add a test for bug before revision 7355 */
     226    inexy = mpfr_check_range (y, inexy, rnd_mode);
     227    inexz = mpfr_check_range (z, inexz, rnd_mode);
     228    MPFR_RET (INEX(inexy,inexz));
     229  }
     230  
     231  /*************** asymptotically fast implementation below ********************/
     232  
     233  /* truncate Q from R to at most prec bits.
     234     Return the number of truncated bits.
     235   */
     236  static mpfr_prec_t
     237  reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
     238  {
     239    mpfr_prec_t l;
     240  
     241    MPFR_MPZ_SIZEINBASE2(l, R);
     242    l = (l > prec) ? l - prec : 0;
     243    mpz_fdiv_q_2exp (Q, R, l);
     244    return l;
     245  }
     246  
     247  /* truncate S and C so that the smaller has prec bits.
     248     Return the number of truncated bits.
     249   */
     250  static unsigned long
     251  reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
     252  {
     253    unsigned long ls;
     254    unsigned long lc;
     255    unsigned long l;
     256  
     257    MPFR_MPZ_SIZEINBASE2(ls, S);
     258    MPFR_MPZ_SIZEINBASE2(lc, C);
     259  
     260    l = (ls < lc) ? ls : lc; /* smaller length */
     261    l = (l > prec) ? l - prec : 0;
     262    mpz_fdiv_q_2exp (S, S, l);
     263    mpz_fdiv_q_2exp (C, C, l);
     264    return l;
     265  }
     266  
     267  /* return in S0/Q0 a rational approximation of sin(X) with absolute error
     268                       bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
     269     and in    C0/Q0 a rational approximation of cos(X), with relative error
     270                       bounded by 9*2^(-prec) (and also absolute error, since
     271                       |cos(X)| <= 1).
     272     We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
     273     We use the following binary splitting formula:
     274     P(a,b) = (-p)^(b-a)
     275     Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
     276     T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
     277  
     278     Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
     279     We do not store the factor 2^r in Q().
     280  
     281     Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
     282  
     283     Return l such that Q0 has to be multiplied by 2^l.
     284  
     285     Assumes prec >= 10.
     286  */
     287  
     288  #define KMAX 64
     289  static unsigned long
     290  sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
     291              mpfr_prec_t prec)
     292  {
     293    mpz_t T[KMAX], Q[KMAX], ptoj[KMAX], pp;
     294    mpfr_prec_t log2_nb_terms[KMAX], mult[KMAX];
     295    mpfr_prec_t accu[KMAX], size_ptoj[KMAX];
     296    mpfr_prec_t prec_i_have, h, r0 = r, pp_s, p_s;
     297    unsigned long i, j, m;
     298    int alloc, k, l;
     299  
     300    if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
     301      {
     302        mpz_set_ui (Q0, 1);
     303        mpz_set_ui (S0, 1);
     304        mpz_set_ui (C0, 1);
     305        return 0;
     306      }
     307  
     308    /* check that X=p/2^r <= 1/2 */
     309    MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
     310  
     311    mpz_init (pp);
     312  
     313    /* normalize p (non-zero here) */
     314    h = mpz_scan1 (p, 0);
     315    mpz_fdiv_q_2exp (pp, p, h); /* p = pp * 2^h */
     316    mpz_mul (pp, pp, pp);
     317    r = 2 * (r - h);            /* x^2 = (p/2^r0)^2 = pp / 2^r */
     318  
     319    /* now p is odd */
     320    alloc = 2;
     321    mpz_init_set_ui (T[0], 6);
     322    mpz_init_set_ui (Q[0], 6);
     323    mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
     324    mpz_init (T[1]);
     325    mpz_init (Q[1]);
     326    mpz_init (ptoj[1]);
     327    mpz_mul (ptoj[1], pp, pp);  /* ptoj[1] = pp^2 */
     328    MPFR_MPZ_SIZEINBASE2(size_ptoj[1], ptoj[1]);
     329  
     330    mpz_mul_2exp (T[0], T[0], r);
     331    mpz_sub (T[0], T[0], pp);      /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
     332    log2_nb_terms[0] = 1;
     333  
     334    /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
     335    MPFR_MPZ_SIZEINBASE2(pp_s, pp);
     336    MPFR_MPZ_SIZEINBASE2(p_s, p);
     337    mult[0] = r - pp_s + r0 - p_s;
     338    /* we have x^3 < 1/2^mult[0] */
     339  
     340    for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
     341      {
     342        /* i is even here */
     343        /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
     344           we have already summed terms of index < i
     345           in S[0]/Q[0], ..., S[k]/Q[k] */
     346        k ++;
     347        if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
     348          {
     349            MPFR_ASSERTD (k + 1 == alloc);
     350            alloc ++;
     351            MPFR_ASSERTN (k + 1 < KMAX);
     352            mpz_init (T[k+1]);
     353            mpz_init (Q[k+1]);
     354            mpz_init (ptoj[k+1]);
     355            mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
     356            MPFR_MPZ_SIZEINBASE2(size_ptoj[k+1], ptoj[k+1]);
     357          }
     358        /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
     359           then                Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
     360           which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
     361           Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
     362        MPFR_ASSERTN (k < KMAX);
     363        log2_nb_terms[k] = 1;
     364        mpz_set_ui (Q[k], 2 * i + 2);
     365        mpz_mul_ui (Q[k], Q[k], 2 * i + 3);
     366        mpz_mul_2exp (T[k], Q[k], r);
     367        mpz_sub (T[k], T[k], pp);
     368        mpz_mul_ui (Q[k], Q[k], 2 * i);
     369        mpz_mul_ui (Q[k], Q[k], 2 * i + 1);
     370        /* the next term of the series is divided by Q[k] and multiplied
     371           by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
     372        MPFR_MPZ_SIZEINBASE2(mult[k], Q[k]);
     373        mult[k] += 2 * r - size_ptoj[1] - 1;
     374        /* the absolute contribution of the next term is 1/2^accu[k] */
     375        accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
     376        prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
     377        j = (i + 2) / 2;
     378        l = 1;
     379        while ((j & 1) == 0) /* combine and reduce */
     380          {
     381            MPFR_ASSERTN (k >= 1);
     382            mpz_mul (T[k], T[k], ptoj[l]);
     383            mpz_mul (T[k-1], T[k-1], Q[k]);
     384            mpz_mul_2exp (T[k-1], T[k-1], r << l);
     385            mpz_add (T[k-1], T[k-1], T[k]);
     386            mpz_mul (Q[k-1], Q[k-1], Q[k]);
     387            log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
     388                                      is a power of 2 by construction */
     389            MPFR_MPZ_SIZEINBASE2(prec_i_have, Q[k]);
     390            mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
     391            accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
     392            prec_i_have = accu[k-1];
     393            l ++;
     394            j >>= 1;
     395            k --;
     396          }
     397      }
     398  
     399    /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
     400       here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
     401    h = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
     402    while (k > 0)
     403      {
     404        mpz_mul (T[k], T[k], ptoj[log2_nb_terms[k-1]]);
     405        mpz_mul (T[k-1], T[k-1], Q[k]);
     406        h += (mpfr_prec_t) 1 << log2_nb_terms[k];
     407        mpz_mul_2exp (T[k-1], T[k-1], r * h);
     408        mpz_add (T[k-1], T[k-1], T[k]);
     409        mpz_mul (Q[k-1], Q[k-1], Q[k]);
     410        k--;
     411      }
     412  
     413    m = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
     414    /* at this point T[0]/(2^m*Q[0]) is an approximation of sin(x) where the 1st
     415       neglected term has contribution < 1/2^prec, thus since the series has
     416       alternate signs, the error is < 1/2^prec */
     417  
     418    /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
     419       which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
     420       [up to a power of two] */
     421    m += reduce (Q0, Q[0], prec);
     422    m -= reduce (T[0], T[0], prec);
     423    /* multiply by x = p/2^m */
     424    mpz_mul (S0, T[0], p);
     425    m -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
     426    /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
     427                |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
     428       |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
     429  
     430    mpz_clear (pp);
     431    for (k = 0; k < alloc; k ++)
     432      {
     433        mpz_clear (T[k]);
     434        mpz_clear (Q[k]);
     435        mpz_clear (ptoj[k]);
     436      }
     437  
     438    /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
     439       = sqrt(Q0^2*2^(2m)-S0^2)/Q0.
     440       Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
     441       then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
     442                          = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
     443                          = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
     444                            [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
     445  
     446                          Since we truncate the square root, we get:
     447                            sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
     448                          = Q*sqrt(cos(X)^2-eps1)+eps2
     449                          = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
     450                          = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
     451                          = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
     452                            since |Q| >= 2^(prec-1) */
     453    /* we assume that Q0*2^m >= 2^(prec-1) */
     454    MPFR_ASSERTN(m + mpz_sizeinbase (Q0, 2) >= prec);
     455    mpz_mul (C0, Q0, Q0);
     456    mpz_mul_2exp (C0, C0, 2 * m);
     457    mpz_submul (C0, S0, S0);
     458    mpz_sqrt (C0, C0);
     459  
     460    return m;
     461  }
     462  
     463  /* Put in s and c approximations of sin(x) and cos(x) respectively.
     464     Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
     465     Return err such that the relative error is bounded by 2^err ulps.
     466  */
     467  static int
     468  sincos_aux (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
     469  {
     470    mpfr_prec_t prec_s, sh;
     471    mpz_t Q, S, C, Q2, S2, C2, y;
     472    mpfr_t x2;
     473    unsigned long l, l2, j, err;
     474  
     475    MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
     476  
     477    prec_s = MPFR_PREC(s);
     478  
     479    mpfr_init2 (x2, MPFR_PREC(x));
     480    mpz_init (Q);
     481    mpz_init (S);
     482    mpz_init (C);
     483    mpz_init (Q2);
     484    mpz_init (S2);
     485    mpz_init (C2);
     486    mpz_init (y);
     487  
     488    mpfr_set (x2, x, MPFR_RNDN); /* exact */
     489    mpz_set_ui (Q, 1);
     490    l = 0;
     491    mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
     492    mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
     493  
     494    /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
     495       S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
     496       'sh-1' is the number of already shifted bits in x2.
     497    */
     498  
     499    for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
     500      {
     501        if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
     502          {
     503            l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
     504            l2 += sh - 1;
     505            mpz_set_ui (Q2, 1);
     506            mpz_set_ui (C2, 1);
     507            mpz_mul_2exp (C2, C2, l2);
     508            mpfr_set_ui (x2, 0, MPFR_RNDN);
     509          }
     510        else
     511          {
     512            /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
     513            mpfr_mul_2ui (x2, x2, sh, MPFR_RNDN); /* exact */
     514            mpfr_get_z (y, x2, MPFR_RNDZ); /* round toward zero: now
     515                                             0 <= x2 < 2^sh, thus
     516                                             0 <= x2/2^(sh-1) < 2^(1-sh) */
     517            if (mpz_cmp_ui (y, 0) == 0)
     518              continue;
     519            mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
     520            l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
     521            /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
     522               and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
     523          }
     524        if (sh == 1) /* S=0, C=1 */
     525          {
     526            l = l2;
     527            mpz_swap (Q, Q2);
     528            mpz_swap (S, S2);
     529            mpz_swap (C, C2);
     530          }
     531        else
     532          {
     533            /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
     534               a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
     535               s <- t - d - e, c <- e - d */
     536            mpz_add (y, S, C); /* a */
     537            mpz_mul (C, C, C2); /* e */
     538            mpz_add (C2, C2, S2); /* b */
     539            mpz_mul (S2, S, S2); /* d */
     540            mpz_mul (y, y, C2); /* a*b */
     541            mpz_sub (S, y, S2); /* t - d */
     542            mpz_sub (S, S, C); /* t - d - e */
     543            mpz_sub (C, C, S2); /* e - d */
     544            mpz_mul (Q, Q, Q2);
     545            /* after j loops, the error is <= (11j-2)*2^(prec_s) */
     546            l += l2;
     547            /* reduce Q to prec_s bits */
     548            l += reduce (Q, Q, prec_s);
     549            /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
     550            l -= reduce2 (S, C, prec_s);
     551          }
     552      }
     553  
     554    j = 11 * j;
     555    for (err = 0; j > 1; j = (j + 1) / 2, err ++);
     556  
     557    mpfr_set_z (s, S, MPFR_RNDN);
     558    mpfr_div_z (s, s, Q, MPFR_RNDN);
     559    mpfr_div_2ui (s, s, l, MPFR_RNDN);
     560  
     561    mpfr_set_z (c, C, MPFR_RNDN);
     562    mpfr_div_z (c, c, Q, MPFR_RNDN);
     563    mpfr_div_2ui (c, c, l, MPFR_RNDN);
     564  
     565    mpz_clear (Q);
     566    mpz_clear (S);
     567    mpz_clear (C);
     568    mpz_clear (Q2);
     569    mpz_clear (S2);
     570    mpz_clear (C2);
     571    mpz_clear (y);
     572    mpfr_clear (x2);
     573    return err;
     574  }
     575  
     576  /* Assumes x is neither NaN, +/-Inf, nor +/- 0.
     577     One of s and c might be NULL, in which case the corresponding value is
     578     not computed.
     579     Assumes s differs from c.
     580   */
     581  int
     582  mpfr_sincos_fast (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd)
     583  {
     584    int inexs, inexc;
     585    mpfr_t x_red, ts, tc;
     586    mpfr_prec_t w;
     587    mpfr_exp_t err, errs, errc;
     588    MPFR_GROUP_DECL (group);
     589    MPFR_ZIV_DECL (loop);
     590  
     591    MPFR_ASSERTN(s != c);
     592    if (s == NULL)
     593      w = MPFR_PREC(c);
     594    else if (c == NULL)
     595      w = MPFR_PREC(s);
     596    else
     597      w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
     598    w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
     599  
     600    MPFR_GROUP_INIT_2(group, w, ts, tc);
     601  
     602    MPFR_ZIV_INIT (loop, w);
     603    for (;;)
     604      {
     605        /* if 0 < x <= Pi/4, we can call sincos_aux directly */
     606        if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
     607          {
     608            err = sincos_aux (ts, tc, x, MPFR_RNDN);
     609          }
     610        /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
     611        else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
     612          {
     613            MPFR_ALIAS(x_red, x, MPFR_SIGN_POS, MPFR_GET_EXP(x));
     614            err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
     615            MPFR_CHANGE_SIGN(ts);
     616          }
     617        else /* argument reduction is needed */
     618          {
     619            long q;
     620            mpfr_t pi;
     621            int neg = 0;
     622  
     623            mpfr_init2 (x_red, w);
     624            mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
     625            mpfr_const_pi (pi, MPFR_RNDN);
     626            mpfr_div_2ui (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
     627            mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
     628            /* x = q * (Pi/2 + eps1) + x_red + eps2,
     629               where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
     630               and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
     631               Since |q| <= x/(Pi/2) <= |x|, we have
     632               q*|eps1| <= 2^(-w), thus
     633               |x - q * Pi/2 - x_red| <= 2^(1-w) */
     634            /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
     635            if (MPFR_IS_NEG(x_red))
     636              {
     637                mpfr_neg (x_red, x_red, MPFR_RNDN);
     638                neg = 1;
     639              }
     640            err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
     641            err ++; /* to take into account the argument reduction */
     642            if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
     643              mpfr_neg (ts, ts, MPFR_RNDN);
     644            if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
     645              {
     646                mpfr_neg (ts, ts, MPFR_RNDN);
     647                mpfr_neg (tc, tc, MPFR_RNDN);
     648              }
     649            if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
     650              {
     651                mpfr_neg (ts, ts, MPFR_RNDN);
     652                mpfr_swap (ts, tc);
     653              }
     654            mpfr_clear (x_red);
     655            mpfr_clear (pi);
     656          }
     657        /* adjust errors with respect to absolute values */
     658        errs = err - MPFR_EXP(ts);
     659        errc = err - MPFR_EXP(tc);
     660        if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
     661            (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
     662          break;
     663        MPFR_ZIV_NEXT (loop, w);
     664        MPFR_GROUP_REPREC_2(group, w, ts, tc);
     665      }
     666    MPFR_ZIV_FREE (loop);
     667  
     668    inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
     669    inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
     670  
     671    MPFR_GROUP_CLEAR (group);
     672    return INEX(inexs,inexc);
     673  }