(root)/
gmp-6.3.0/
mpn/
generic/
powm.c
       1  /* mpn_powm -- Compute R = U^E mod M.
       2  
       3     Contributed to the GNU project by Torbjorn Granlund.
       4  
       5     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       6     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2007-2012, 2019-2021 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  /*
      39    BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
      40  
      41    1. W <- U
      42  
      43    2. T <- (B^n * U) mod M                Convert to REDC form
      44  
      45    3. Compute table U^1, U^3, U^5... of E-dependent size
      46  
      47    4. While there are more bits in E
      48         W <- power left-to-right base-k
      49  
      50  
      51    TODO:
      52  
      53     * Make getbits a macro, thereby allowing it to update the index operand.
      54       That will simplify the code using getbits.  (Perhaps make getbits' sibling
      55       getbit then have similar form, for symmetry.)
      56  
      57     * Write an itch function.  Or perhaps get rid of tp parameter since the huge
      58       pp area is allocated locally anyway?
      59  
      60     * Choose window size without looping.  (Superoptimize or think(tm).)
      61  
      62     * Handle small bases with initial, reduction-free exponentiation.
      63  
      64     * Call new division functions, not mpn_tdiv_qr.
      65  
      66     * Consider special code for one-limb M.
      67  
      68     * How should we handle the redc1/redc2/redc_n choice?
      69       - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
      70       - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
      71       - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
      72       This disregards the addmul_N constant term, but we could think of
      73       that as part of the respective mullo.
      74  
      75     * When U (the base) is small, we should start the exponentiation with plain
      76       operations, then convert that partial result to REDC form.
      77  
      78     * When U is just one limb, should it be handled without the k-ary tricks?
      79       We could keep a factor of B^n in W, but use U' = BU as base.  After
      80       multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
      81       mod M.
      82  */
      83  
      84  #include "gmp-impl.h"
      85  #include "longlong.h"
      86  
      87  #undef MPN_REDC_0
      88  #define MPN_REDC_0(r0, u1, u0, m0, invm)				\
      89    do {									\
      90      mp_limb_t _p1, _u1, _u0, _m0, _r0, _dummy;				\
      91      _u0 = (u0);								\
      92      _m0 = (m0);								\
      93      umul_ppmm (_p1, _dummy, _m0, (_u0 * (invm)) & GMP_NUMB_MASK);	\
      94      ASSERT (((_u0 - _dummy) & GMP_NUMB_MASK) == 0);			\
      95      _u1 = (u1);								\
      96      _r0 = _u1 - _p1;							\
      97      _r0 = _u1 < _p1 ? _r0 + _m0 : _r0; /* _u1 < _r0 */			\
      98      (r0) = _r0 & GMP_NUMB_MASK;						\
      99    } while (0)
     100  
     101  #undef MPN_REDC_1
     102  #if HAVE_NATIVE_mpn_sbpi1_bdiv_r
     103  #define MPN_REDC_1(rp, up, mp, n, invm)					\
     104    do {									\
     105      mp_limb_t cy;							\
     106      cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);			\
     107      if (cy != 0)							\
     108        mpn_sub_n (rp, up + n, mp, n);					\
     109      else								\
     110        MPN_COPY (rp, up + n, n);						\
     111    } while (0)
     112  #else
     113  #define MPN_REDC_1(rp, up, mp, n, invm)					\
     114    do {									\
     115      mp_limb_t cy;							\
     116      cy = mpn_redc_1 (rp, up, mp, n, invm);				\
     117      if (cy != 0)							\
     118        mpn_sub_n (rp, rp, mp, n);					\
     119    } while (0)
     120  #endif
     121  
     122  #undef MPN_REDC_2
     123  #define MPN_REDC_2(rp, up, mp, n, mip)					\
     124    do {									\
     125      mp_limb_t cy;							\
     126      cy = mpn_redc_2 (rp, up, mp, n, mip);				\
     127      if (cy != 0)							\
     128        mpn_sub_n (rp, rp, mp, n);					\
     129    } while (0)
     130  
     131  #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
     132  #define WANT_REDC_2 1
     133  #endif
     134  
     135  #define getbit(p,bi) \
     136    ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
     137  
     138  static inline mp_limb_t
     139  getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
     140  {
     141    int nbits_in_r;
     142    mp_limb_t r;
     143    mp_size_t i;
     144  
     145    if (bi <= nbits)
     146      {
     147        return p[0] & (((mp_limb_t) 1 << bi) - 1);
     148      }
     149    else
     150      {
     151        bi -= nbits;			/* bit index of low bit to extract */
     152        i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
     153        bi %= GMP_NUMB_BITS;		/* bit index in low word */
     154        r = p[i] >> bi;			/* extract (low) bits */
     155        nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
     156        if (nbits_in_r < nbits)		/* did we get enough bits? */
     157  	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
     158        return r & (((mp_limb_t) 1 << nbits) - 1);
     159      }
     160  }
     161  
     162  static inline int
     163  win_size (mp_bitcnt_t eb)
     164  {
     165    int k;
     166    static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
     167    for (k = 0; eb > x[k++]; )
     168      ;
     169    return k;
     170  }
     171  
     172  /* Convert U to REDC form, U_r = B^n * U mod M */
     173  static void
     174  redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
     175  {
     176    mp_ptr tp, qp;
     177    TMP_DECL;
     178    TMP_MARK;
     179  
     180    TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
     181  
     182    MPN_ZERO (tp, n);
     183    MPN_COPY (tp + n, up, un);
     184    mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
     185    TMP_FREE;
     186  }
     187  
     188  #if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
     189  #undef mpn_rsblsh1_n_ip2
     190  #if HAVE_NATIVE_mpn_rsblsh1_n
     191  #define mpn_rsblsh1_n_ip2(a,b,n)	mpn_rsblsh1_n(a,b,a,n)
     192  #else
     193  #define mpn_rsblsh1_n_ip2(a,b,n)				\
     194    do								\
     195      {								\
     196        mpn_lshift (a, a, n, 1);					\
     197        mpn_sub_n (a, a, b, n);					\
     198      } while (0)
     199  #endif
     200  #endif
     201  
     202  #define INNERLOOP2						\
     203    do								\
     204      {								\
     205        MPN_SQR (tp, rp, n);					\
     206        MPN_REDUCE (rp, tp, mp, n, mip);				\
     207        if (mpn_cmp (rp, mp, n) >= 0)				\
     208  	ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n));		\
     209        if (getbit (ep, ebi) != 0)				\
     210  	{							\
     211  	  if (rp[n - 1] >> (mbi - 1) % GMP_LIMB_BITS == 0)	\
     212  	    ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1));		\
     213  	  else							\
     214  	    mpn_rsblsh1_n_ip2 (rp, mp, n);			\
     215  	}							\
     216      } while (--ebi != 0)
     217  
     218  /* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
     219     Requires that mp[n-1..0] is odd and > 1.
     220     Requires that ep[en-1..0] is > 1.
     221     Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
     222  static void
     223  mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
     224  	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
     225  {
     226    mp_limb_t ip[2], *mip;
     227    mp_bitcnt_t ebi, mbi, tbi;
     228    mp_size_t tn;
     229    int count;
     230    TMP_DECL;
     231  
     232    ASSERT (en > 1 || (en == 1 && ep[0] > 1));
     233    ASSERT (n > 0 && (mp[0] & 1) != 0);
     234  
     235    MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
     236    MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
     237  
     238    if (LIKELY (mbi <= GMP_NUMB_MAX))
     239      {
     240        count_leading_zeros(count, (mp_limb_t) mbi);
     241        count = GMP_NUMB_BITS - (count - GMP_NAIL_BITS);
     242      }
     243    else
     244      {
     245        mp_bitcnt_t tc = mbi;
     246  
     247        count = 0;
     248        do { ++count; } while ((tc >>= 1) != 0);
     249      }
     250  
     251    tbi = getbits (ep, ebi, count);
     252    if (tbi >= mbi)
     253      {
     254        --count;
     255        ASSERT ((tbi >> count) == 1);
     256        tbi >>= 1;
     257        ASSERT (tbi < mbi);
     258        ASSERT (ebi > count);
     259      }
     260    else if (ebi <= count)
     261      {
     262        MPN_FILL (rp, n, 0);
     263        rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
     264        return;
     265      }
     266    ebi -= count;
     267  
     268    if (n == 1)
     269      {
     270        mp_limb_t r0, m0, invm;
     271        m0 = *mp;
     272  
     273        /* redcify (rp, tp, tn + 1, mp, n); */
     274        /* TODO: test direct use of udiv_qrnnd */
     275        ASSERT (tbi < GMP_LIMB_BITS);
     276        tp[1] = CNST_LIMB (1) << tbi;
     277        tp[0] = CNST_LIMB (0);
     278        r0 = mpn_mod_1 (tp, 2, m0);
     279  
     280        binvert_limb (invm, m0);
     281        do
     282  	{
     283  	  mp_limb_t t0, t1, t2;
     284  	  /* MPN_SQR (tp, rp, n);			*/
     285  	  umul_ppmm (t1, t0, r0, r0);
     286  	  /* MPN_REDUCE (rp, tp, mp, n, mip);		*/
     287  	  MPN_REDC_0(r0, t1, t0, m0, invm);
     288  
     289  	  t2 = r0 << 1;
     290  	  t2 = r0 > (m0 >> 1) ? t2 - m0 : t2;
     291  	  r0 = getbit (ep, ebi) != 0 ? t2 : r0;
     292  	} while (--ebi != 0);
     293  
     294        /* tp[1] = 0; tp[0] = r0;	*/
     295        /* MPN_REDUCE (rp, tp, mp, n, mip);	*/
     296        MPN_REDC_0(*rp, 0, r0, m0, invm);
     297  
     298        return;
     299      }
     300  
     301    TMP_MARK;
     302  
     303  #if WANT_REDC_2
     304    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     305      {
     306        mip = ip;
     307        binvert_limb (ip[0], mp[0]);
     308        ip[0] = -ip[0];
     309      }
     310    else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     311      {
     312        mip = ip;
     313        mpn_binvert (ip, mp, 2, tp);
     314        ip[0] = -ip[0]; ip[1] = ~ip[1];
     315      }
     316  #else
     317    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     318      {
     319        mip = ip;
     320        binvert_limb (ip[0], mp[0]);
     321        ip[0] = -ip[0];
     322      }
     323  #endif
     324    else
     325      {
     326        mip = TMP_ALLOC_LIMBS (n);
     327        mpn_binvert (mip, mp, n, tp);
     328      }
     329  
     330    tn = tbi / GMP_LIMB_BITS;
     331    MPN_ZERO (tp, tn);
     332    tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
     333  
     334    redcify (rp, tp, tn + 1, mp, n);
     335  
     336  #if WANT_REDC_2
     337    if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
     338      {
     339        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     340  	{
     341  	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
     342  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     343  	    {
     344  #undef MPN_SQR
     345  #undef MPN_REDUCE
     346  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     347  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     348  	      INNERLOOP2;
     349  	    }
     350  	  else
     351  	    {
     352  #undef MPN_SQR
     353  #undef MPN_REDUCE
     354  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     355  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     356  	      INNERLOOP2;
     357  	    }
     358  	}
     359        else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     360  	{
     361  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     362  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     363  	    {
     364  #undef MPN_SQR
     365  #undef MPN_REDUCE
     366  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     367  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     368  	      INNERLOOP2;
     369  	    }
     370  	  else
     371  	    {
     372  #undef MPN_SQR
     373  #undef MPN_REDUCE
     374  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     375  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     376  	      INNERLOOP2;
     377  	    }
     378  	}
     379        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     380  	{
     381  #undef MPN_SQR
     382  #undef MPN_REDUCE
     383  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     384  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     385  	  INNERLOOP2;
     386  	}
     387        else
     388  	{
     389  #undef MPN_SQR
     390  #undef MPN_REDUCE
     391  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     392  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     393  	  INNERLOOP2;
     394  	}
     395      }
     396    else
     397      {
     398        if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     399  	{
     400  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     401  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     402  	    {
     403  #undef MPN_SQR
     404  #undef MPN_REDUCE
     405  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     406  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     407  	      INNERLOOP2;
     408  	    }
     409  	  else
     410  	    {
     411  #undef MPN_SQR
     412  #undef MPN_REDUCE
     413  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     414  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     415  	      INNERLOOP2;
     416  	    }
     417  	}
     418        else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     419  	{
     420  #undef MPN_SQR
     421  #undef MPN_REDUCE
     422  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     423  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     424  	  INNERLOOP2;
     425  	}
     426        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     427  	{
     428  #undef MPN_SQR
     429  #undef MPN_REDUCE
     430  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     431  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     432  	  INNERLOOP2;
     433  	}
     434        else
     435  	{
     436  #undef MPN_SQR
     437  #undef MPN_REDUCE
     438  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     439  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     440  	  INNERLOOP2;
     441  	}
     442      }
     443  
     444  #else  /* WANT_REDC_2 */
     445  
     446    if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
     447      {
     448        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     449  	{
     450  	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
     451  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     452  	    {
     453  #undef MPN_SQR
     454  #undef MPN_REDUCE
     455  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     456  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     457  	      INNERLOOP2;
     458  	    }
     459  	  else
     460  	    {
     461  #undef MPN_SQR
     462  #undef MPN_REDUCE
     463  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     464  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     465  	      INNERLOOP2;
     466  	    }
     467  	}
     468        else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     469  	{
     470  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     471  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     472  	    {
     473  #undef MPN_SQR
     474  #undef MPN_REDUCE
     475  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     476  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     477  	      INNERLOOP2;
     478  	    }
     479  	  else
     480  	    {
     481  #undef MPN_SQR
     482  #undef MPN_REDUCE
     483  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     484  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     485  	      INNERLOOP2;
     486  	    }
     487  	}
     488        else
     489  	{
     490  #undef MPN_SQR
     491  #undef MPN_REDUCE
     492  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     493  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     494  	  INNERLOOP2;
     495  	}
     496      }
     497    else
     498      {
     499        if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     500  	{
     501  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     502  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     503  	    {
     504  #undef MPN_SQR
     505  #undef MPN_REDUCE
     506  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     507  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     508  	      INNERLOOP2;
     509  	    }
     510  	  else
     511  	    {
     512  #undef MPN_SQR
     513  #undef MPN_REDUCE
     514  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     515  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     516  	      INNERLOOP2;
     517  	    }
     518  	}
     519        else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     520  	{
     521  #undef MPN_SQR
     522  #undef MPN_REDUCE
     523  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     524  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     525  	  INNERLOOP2;
     526  	}
     527        else
     528  	{
     529  #undef MPN_SQR
     530  #undef MPN_REDUCE
     531  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     532  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     533  	  INNERLOOP2;
     534  	}
     535      }
     536  #endif  /* WANT_REDC_2 */
     537  
     538    MPN_COPY (tp, rp, n);
     539    MPN_FILL (tp + n, n, 0);
     540  
     541  #if WANT_REDC_2
     542    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     543      MPN_REDC_1 (rp, tp, mp, n, ip[0]);
     544    else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     545      MPN_REDC_2 (rp, tp, mp, n, mip);
     546  #else
     547    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     548      MPN_REDC_1 (rp, tp, mp, n, ip[0]);
     549  #endif
     550    else
     551      mpn_redc_n (rp, tp, mp, n, mip);
     552  
     553    if (mpn_cmp (rp, mp, n) >= 0)
     554      mpn_sub_n (rp, rp, mp, n);
     555  
     556    TMP_FREE;
     557  }
     558  
     559  /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
     560     Requires that mp[n-1..0] is odd.
     561     Requires that ep[en-1..0] is > 1.
     562     Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
     563  void
     564  mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
     565  	  mp_srcptr ep, mp_size_t en,
     566  	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
     567  {
     568    mp_limb_t ip[2], *mip;
     569    int cnt;
     570    mp_bitcnt_t ebi;
     571    int windowsize, this_windowsize;
     572    mp_limb_t expbits;
     573    mp_ptr pp, this_pp;
     574    long i;
     575    TMP_DECL;
     576  
     577    ASSERT (en > 1 || (en == 1 && ep[0] > 1));
     578    ASSERT (n >= 1 && ((mp[0] & 1) != 0));
     579  
     580    if (bn == 1 && bp[0] == 2)
     581      {
     582        mpn_2powm (rp, ep, en, mp, n, tp);
     583        return;
     584      }
     585  
     586    TMP_MARK;
     587  
     588    MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
     589  
     590  #if 0
     591    if (bn < n)
     592      {
     593        /* Do the first few exponent bits without mod reductions,
     594  	 until the result is greater than the mod argument.  */
     595        for (;;)
     596  	{
     597  	  mpn_sqr (tp, this_pp, tn);
     598  	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
     599  	  if (getbit (ep, ebi) != 0)
     600  	    mpn_mul (..., tp, tn, bp, bn);
     601  	  ebi--;
     602  	}
     603      }
     604  #endif
     605  
     606    windowsize = win_size (ebi);
     607  
     608  #if WANT_REDC_2
     609    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     610      {
     611        mip = ip;
     612        binvert_limb (mip[0], mp[0]);
     613        mip[0] = -mip[0];
     614      }
     615    else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     616      {
     617        mip = ip;
     618        mpn_binvert (mip, mp, 2, tp);
     619        mip[0] = -mip[0]; mip[1] = ~mip[1];
     620      }
     621  #else
     622    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     623      {
     624        mip = ip;
     625        binvert_limb (mip[0], mp[0]);
     626        mip[0] = -mip[0];
     627      }
     628  #endif
     629    else
     630      {
     631        mip = TMP_ALLOC_LIMBS (n);
     632        mpn_binvert (mip, mp, n, tp);
     633      }
     634  
     635    pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
     636  
     637    this_pp = pp;
     638    redcify (this_pp, bp, bn, mp, n);
     639  
     640    /* Store b^2 at rp.  */
     641    mpn_sqr (tp, this_pp, n);
     642  #if 0
     643    if (n == 1) {
     644      MPN_REDC_0 (rp[0], tp[1], tp[0], mp[0], -mip[0]);
     645    } else
     646  #endif
     647  #if WANT_REDC_2
     648    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     649      MPN_REDC_1 (rp, tp, mp, n, mip[0]);
     650    else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     651      MPN_REDC_2 (rp, tp, mp, n, mip);
     652  #else
     653    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     654      MPN_REDC_1 (rp, tp, mp, n, mip[0]);
     655  #endif
     656    else
     657      mpn_redc_n (rp, tp, mp, n, mip);
     658  
     659    /* Precompute odd powers of b and put them in the temporary area at pp.  */
     660    for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
     661  #if 1
     662      if (n == 1) {
     663        umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
     664        ++this_pp ;
     665        MPN_REDC_0 (*this_pp, tp[1], tp[0], *mp, -mip[0]);
     666      } else
     667  #endif
     668      {
     669        mpn_mul_n (tp, this_pp, rp, n);
     670        this_pp += n;
     671  #if WANT_REDC_2
     672        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     673  	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
     674        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     675  	MPN_REDC_2 (this_pp, tp, mp, n, mip);
     676  #else
     677        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     678  	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
     679  #endif
     680        else
     681  	mpn_redc_n (this_pp, tp, mp, n, mip);
     682      }
     683  
     684    expbits = getbits (ep, ebi, windowsize);
     685    ebi -= windowsize;
     686  
     687    /* THINK: Should we initialise the case expbits % 4 == 0 with a mul? */
     688    count_trailing_zeros (cnt, expbits);
     689    ebi += cnt;
     690    expbits >>= cnt;
     691  
     692    MPN_COPY (rp, pp + n * (expbits >> 1), n);
     693  
     694  #define INNERLOOP							\
     695    while (ebi != 0)							\
     696      {									\
     697        while (getbit (ep, ebi) == 0)					\
     698  	{								\
     699  	  MPN_SQR (tp, rp, n);						\
     700  	  MPN_REDUCE (rp, tp, mp, n, mip);				\
     701  	  if (--ebi == 0)						\
     702  	    goto done;							\
     703  	}								\
     704  									\
     705        /* The next bit of the exponent is 1.  Now extract the largest	\
     706  	 block of bits <= windowsize, and such that the least		\
     707  	 significant bit is 1.  */					\
     708  									\
     709        expbits = getbits (ep, ebi, windowsize);				\
     710        this_windowsize = MIN (ebi, windowsize);				\
     711  									\
     712        count_trailing_zeros (cnt, expbits);				\
     713        this_windowsize -= cnt;						\
     714        ebi -= this_windowsize;						\
     715        expbits >>= cnt;							\
     716  									\
     717        do								\
     718  	{								\
     719  	  MPN_SQR (tp, rp, n);						\
     720  	  MPN_REDUCE (rp, tp, mp, n, mip);				\
     721  	}								\
     722        while (--this_windowsize != 0);					\
     723  									\
     724        MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
     725        MPN_REDUCE (rp, tp, mp, n, mip);					\
     726      }
     727  
     728  
     729    if (n == 1)
     730      {
     731  #undef MPN_MUL_N
     732  #undef MPN_SQR
     733  #undef MPN_REDUCE
     734  #define MPN_MUL_N(r,a,b,n)		umul_ppmm((r)[1], *(r), *(a), *(b))
     735  #define MPN_SQR(r,a,n)			umul_ppmm((r)[1], *(r), *(a), *(a))
     736  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_0(*(rp), (tp)[1], (tp)[0], *(mp), - *(mip))
     737        INNERLOOP;
     738      }
     739    else
     740  #if WANT_REDC_2
     741    if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
     742      {
     743        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     744  	{
     745  	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
     746  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     747  	    {
     748  #undef MPN_MUL_N
     749  #undef MPN_SQR
     750  #undef MPN_REDUCE
     751  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     752  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     753  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     754  	      INNERLOOP;
     755  	    }
     756  	  else
     757  	    {
     758  #undef MPN_MUL_N
     759  #undef MPN_SQR
     760  #undef MPN_REDUCE
     761  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     762  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     763  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     764  	      INNERLOOP;
     765  	    }
     766  	}
     767        else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     768  	{
     769  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     770  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     771  	    {
     772  #undef MPN_MUL_N
     773  #undef MPN_SQR
     774  #undef MPN_REDUCE
     775  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     776  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     777  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     778  	      INNERLOOP;
     779  	    }
     780  	  else
     781  	    {
     782  #undef MPN_MUL_N
     783  #undef MPN_SQR
     784  #undef MPN_REDUCE
     785  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     786  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     787  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     788  	      INNERLOOP;
     789  	    }
     790  	}
     791        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     792  	{
     793  #undef MPN_MUL_N
     794  #undef MPN_SQR
     795  #undef MPN_REDUCE
     796  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     797  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     798  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     799  	  INNERLOOP;
     800  	}
     801        else
     802  	{
     803  #undef MPN_MUL_N
     804  #undef MPN_SQR
     805  #undef MPN_REDUCE
     806  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     807  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     808  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     809  	  INNERLOOP;
     810  	}
     811      }
     812    else
     813      {
     814        if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     815  	{
     816  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     817  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     818  	    {
     819  #undef MPN_MUL_N
     820  #undef MPN_SQR
     821  #undef MPN_REDUCE
     822  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     823  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     824  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     825  	      INNERLOOP;
     826  	    }
     827  	  else
     828  	    {
     829  #undef MPN_MUL_N
     830  #undef MPN_SQR
     831  #undef MPN_REDUCE
     832  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     833  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     834  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     835  	      INNERLOOP;
     836  	    }
     837  	}
     838        else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     839  	{
     840  #undef MPN_MUL_N
     841  #undef MPN_SQR
     842  #undef MPN_REDUCE
     843  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     844  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     845  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     846  	  INNERLOOP;
     847  	}
     848        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     849  	{
     850  #undef MPN_MUL_N
     851  #undef MPN_SQR
     852  #undef MPN_REDUCE
     853  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     854  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     855  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
     856  	  INNERLOOP;
     857  	}
     858        else
     859  	{
     860  #undef MPN_MUL_N
     861  #undef MPN_SQR
     862  #undef MPN_REDUCE
     863  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     864  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     865  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     866  	  INNERLOOP;
     867  	}
     868      }
     869  
     870  #else  /* WANT_REDC_2 */
     871  
     872    if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
     873      {
     874        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     875  	{
     876  	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
     877  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     878  	    {
     879  #undef MPN_MUL_N
     880  #undef MPN_SQR
     881  #undef MPN_REDUCE
     882  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     883  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     884  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     885  	      INNERLOOP;
     886  	    }
     887  	  else
     888  	    {
     889  #undef MPN_MUL_N
     890  #undef MPN_SQR
     891  #undef MPN_REDUCE
     892  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     893  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     894  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     895  	      INNERLOOP;
     896  	    }
     897  	}
     898        else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     899  	{
     900  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     901  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     902  	    {
     903  #undef MPN_MUL_N
     904  #undef MPN_SQR
     905  #undef MPN_REDUCE
     906  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     907  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     908  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     909  	      INNERLOOP;
     910  	    }
     911  	  else
     912  	    {
     913  #undef MPN_MUL_N
     914  #undef MPN_SQR
     915  #undef MPN_REDUCE
     916  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     917  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     918  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     919  	      INNERLOOP;
     920  	    }
     921  	}
     922        else
     923  	{
     924  #undef MPN_MUL_N
     925  #undef MPN_SQR
     926  #undef MPN_REDUCE
     927  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     928  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     929  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     930  	  INNERLOOP;
     931  	}
     932      }
     933    else
     934      {
     935        if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
     936  	{
     937  	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
     938  	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
     939  	    {
     940  #undef MPN_MUL_N
     941  #undef MPN_SQR
     942  #undef MPN_REDUCE
     943  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     944  #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
     945  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     946  	      INNERLOOP;
     947  	    }
     948  	  else
     949  	    {
     950  #undef MPN_MUL_N
     951  #undef MPN_SQR
     952  #undef MPN_REDUCE
     953  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
     954  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
     955  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     956  	      INNERLOOP;
     957  	    }
     958  	}
     959        else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     960  	{
     961  #undef MPN_MUL_N
     962  #undef MPN_SQR
     963  #undef MPN_REDUCE
     964  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     965  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     966  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
     967  	  INNERLOOP;
     968  	}
     969        else
     970  	{
     971  #undef MPN_MUL_N
     972  #undef MPN_SQR
     973  #undef MPN_REDUCE
     974  #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
     975  #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
     976  #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
     977  	  INNERLOOP;
     978  	}
     979      }
     980  #endif  /* WANT_REDC_2 */
     981  
     982   done:
     983  
     984    MPN_COPY (tp, rp, n);
     985    MPN_ZERO (tp + n, n);
     986  
     987  #if WANT_REDC_2
     988    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     989      MPN_REDC_1 (rp, tp, mp, n, mip[0]);
     990    else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     991      MPN_REDC_2 (rp, tp, mp, n, mip);
     992  #else
     993    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     994      MPN_REDC_1 (rp, tp, mp, n, mip[0]);
     995  #endif
     996    else
     997      mpn_redc_n (rp, tp, mp, n, mip);
     998  
     999    if (mpn_cmp (rp, mp, n) >= 0)
    1000      mpn_sub_n (rp, rp, mp, n);
    1001  
    1002    TMP_FREE;
    1003  }