1 /* mpn_powm -- Compute R = U^E mod M.
2
3 Contributed to the GNU project by Torbjorn Granlund.
4
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2007-2012, 2019-2021 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37
38 /*
39 BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
40
41 1. W <- U
42
43 2. T <- (B^n * U) mod M Convert to REDC form
44
45 3. Compute table U^1, U^3, U^5... of E-dependent size
46
47 4. While there are more bits in E
48 W <- power left-to-right base-k
49
50
51 TODO:
52
53 * Make getbits a macro, thereby allowing it to update the index operand.
54 That will simplify the code using getbits. (Perhaps make getbits' sibling
55 getbit then have similar form, for symmetry.)
56
57 * Write an itch function. Or perhaps get rid of tp parameter since the huge
58 pp area is allocated locally anyway?
59
60 * Choose window size without looping. (Superoptimize or think(tm).)
61
62 * Handle small bases with initial, reduction-free exponentiation.
63
64 * Call new division functions, not mpn_tdiv_qr.
65
66 * Consider special code for one-limb M.
67
68 * How should we handle the redc1/redc2/redc_n choice?
69 - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1))
70 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71 - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72 This disregards the addmul_N constant term, but we could think of
73 that as part of the respective mullo.
74
75 * When U (the base) is small, we should start the exponentiation with plain
76 operations, then convert that partial result to REDC form.
77
78 * When U is just one limb, should it be handled without the k-ary tricks?
79 We could keep a factor of B^n in W, but use U' = BU as base. After
80 multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81 mod M.
82 */
83
84 #include "gmp-impl.h"
85 #include "longlong.h"
86
87 #undef MPN_REDC_0
88 #define MPN_REDC_0(r0, u1, u0, m0, invm) \
89 do { \
90 mp_limb_t _p1, _u1, _u0, _m0, _r0, _dummy; \
91 _u0 = (u0); \
92 _m0 = (m0); \
93 umul_ppmm (_p1, _dummy, _m0, (_u0 * (invm)) & GMP_NUMB_MASK); \
94 ASSERT (((_u0 - _dummy) & GMP_NUMB_MASK) == 0); \
95 _u1 = (u1); \
96 _r0 = _u1 - _p1; \
97 _r0 = _u1 < _p1 ? _r0 + _m0 : _r0; /* _u1 < _r0 */ \
98 (r0) = _r0 & GMP_NUMB_MASK; \
99 } while (0)
100
101 #undef MPN_REDC_1
102 #if HAVE_NATIVE_mpn_sbpi1_bdiv_r
103 #define MPN_REDC_1(rp, up, mp, n, invm) \
104 do { \
105 mp_limb_t cy; \
106 cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \
107 if (cy != 0) \
108 mpn_sub_n (rp, up + n, mp, n); \
109 else \
110 MPN_COPY (rp, up + n, n); \
111 } while (0)
112 #else
113 #define MPN_REDC_1(rp, up, mp, n, invm) \
114 do { \
115 mp_limb_t cy; \
116 cy = mpn_redc_1 (rp, up, mp, n, invm); \
117 if (cy != 0) \
118 mpn_sub_n (rp, rp, mp, n); \
119 } while (0)
120 #endif
121
122 #undef MPN_REDC_2
123 #define MPN_REDC_2(rp, up, mp, n, mip) \
124 do { \
125 mp_limb_t cy; \
126 cy = mpn_redc_2 (rp, up, mp, n, mip); \
127 if (cy != 0) \
128 mpn_sub_n (rp, rp, mp, n); \
129 } while (0)
130
131 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
132 #define WANT_REDC_2 1
133 #endif
134
135 #define getbit(p,bi) \
136 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
137
138 static inline mp_limb_t
139 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
140 {
141 int nbits_in_r;
142 mp_limb_t r;
143 mp_size_t i;
144
145 if (bi <= nbits)
146 {
147 return p[0] & (((mp_limb_t) 1 << bi) - 1);
148 }
149 else
150 {
151 bi -= nbits; /* bit index of low bit to extract */
152 i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
153 bi %= GMP_NUMB_BITS; /* bit index in low word */
154 r = p[i] >> bi; /* extract (low) bits */
155 nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
156 if (nbits_in_r < nbits) /* did we get enough bits? */
157 r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
158 return r & (((mp_limb_t) 1 << nbits) - 1);
159 }
160 }
161
162 static inline int
163 win_size (mp_bitcnt_t eb)
164 {
165 int k;
166 static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
167 for (k = 0; eb > x[k++]; )
168 ;
169 return k;
170 }
171
172 /* Convert U to REDC form, U_r = B^n * U mod M */
173 static void
174 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
175 {
176 mp_ptr tp, qp;
177 TMP_DECL;
178 TMP_MARK;
179
180 TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
181
182 MPN_ZERO (tp, n);
183 MPN_COPY (tp + n, up, un);
184 mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
185 TMP_FREE;
186 }
187
188 #if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
189 #undef mpn_rsblsh1_n_ip2
190 #if HAVE_NATIVE_mpn_rsblsh1_n
191 #define mpn_rsblsh1_n_ip2(a,b,n) mpn_rsblsh1_n(a,b,a,n)
192 #else
193 #define mpn_rsblsh1_n_ip2(a,b,n) \
194 do \
195 { \
196 mpn_lshift (a, a, n, 1); \
197 mpn_sub_n (a, a, b, n); \
198 } while (0)
199 #endif
200 #endif
201
202 #define INNERLOOP2 \
203 do \
204 { \
205 MPN_SQR (tp, rp, n); \
206 MPN_REDUCE (rp, tp, mp, n, mip); \
207 if (mpn_cmp (rp, mp, n) >= 0) \
208 ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n)); \
209 if (getbit (ep, ebi) != 0) \
210 { \
211 if (rp[n - 1] >> (mbi - 1) % GMP_LIMB_BITS == 0) \
212 ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1)); \
213 else \
214 mpn_rsblsh1_n_ip2 (rp, mp, n); \
215 } \
216 } while (--ebi != 0)
217
218 /* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
219 Requires that mp[n-1..0] is odd and > 1.
220 Requires that ep[en-1..0] is > 1.
221 Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
222 static void
223 mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
224 mp_srcptr mp, mp_size_t n, mp_ptr tp)
225 {
226 mp_limb_t ip[2], *mip;
227 mp_bitcnt_t ebi, mbi, tbi;
228 mp_size_t tn;
229 int count;
230 TMP_DECL;
231
232 ASSERT (en > 1 || (en == 1 && ep[0] > 1));
233 ASSERT (n > 0 && (mp[0] & 1) != 0);
234
235 MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
236 MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
237
238 if (LIKELY (mbi <= GMP_NUMB_MAX))
239 {
240 count_leading_zeros(count, (mp_limb_t) mbi);
241 count = GMP_NUMB_BITS - (count - GMP_NAIL_BITS);
242 }
243 else
244 {
245 mp_bitcnt_t tc = mbi;
246
247 count = 0;
248 do { ++count; } while ((tc >>= 1) != 0);
249 }
250
251 tbi = getbits (ep, ebi, count);
252 if (tbi >= mbi)
253 {
254 --count;
255 ASSERT ((tbi >> count) == 1);
256 tbi >>= 1;
257 ASSERT (tbi < mbi);
258 ASSERT (ebi > count);
259 }
260 else if (ebi <= count)
261 {
262 MPN_FILL (rp, n, 0);
263 rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
264 return;
265 }
266 ebi -= count;
267
268 if (n == 1)
269 {
270 mp_limb_t r0, m0, invm;
271 m0 = *mp;
272
273 /* redcify (rp, tp, tn + 1, mp, n); */
274 /* TODO: test direct use of udiv_qrnnd */
275 ASSERT (tbi < GMP_LIMB_BITS);
276 tp[1] = CNST_LIMB (1) << tbi;
277 tp[0] = CNST_LIMB (0);
278 r0 = mpn_mod_1 (tp, 2, m0);
279
280 binvert_limb (invm, m0);
281 do
282 {
283 mp_limb_t t0, t1, t2;
284 /* MPN_SQR (tp, rp, n); */
285 umul_ppmm (t1, t0, r0, r0);
286 /* MPN_REDUCE (rp, tp, mp, n, mip); */
287 MPN_REDC_0(r0, t1, t0, m0, invm);
288
289 t2 = r0 << 1;
290 t2 = r0 > (m0 >> 1) ? t2 - m0 : t2;
291 r0 = getbit (ep, ebi) != 0 ? t2 : r0;
292 } while (--ebi != 0);
293
294 /* tp[1] = 0; tp[0] = r0; */
295 /* MPN_REDUCE (rp, tp, mp, n, mip); */
296 MPN_REDC_0(*rp, 0, r0, m0, invm);
297
298 return;
299 }
300
301 TMP_MARK;
302
303 #if WANT_REDC_2
304 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
305 {
306 mip = ip;
307 binvert_limb (ip[0], mp[0]);
308 ip[0] = -ip[0];
309 }
310 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
311 {
312 mip = ip;
313 mpn_binvert (ip, mp, 2, tp);
314 ip[0] = -ip[0]; ip[1] = ~ip[1];
315 }
316 #else
317 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
318 {
319 mip = ip;
320 binvert_limb (ip[0], mp[0]);
321 ip[0] = -ip[0];
322 }
323 #endif
324 else
325 {
326 mip = TMP_ALLOC_LIMBS (n);
327 mpn_binvert (mip, mp, n, tp);
328 }
329
330 tn = tbi / GMP_LIMB_BITS;
331 MPN_ZERO (tp, tn);
332 tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
333
334 redcify (rp, tp, tn + 1, mp, n);
335
336 #if WANT_REDC_2
337 if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
338 {
339 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
340 {
341 if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
342 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
343 {
344 #undef MPN_SQR
345 #undef MPN_REDUCE
346 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
347 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
348 INNERLOOP2;
349 }
350 else
351 {
352 #undef MPN_SQR
353 #undef MPN_REDUCE
354 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
355 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
356 INNERLOOP2;
357 }
358 }
359 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
360 {
361 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
362 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
363 {
364 #undef MPN_SQR
365 #undef MPN_REDUCE
366 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
367 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
368 INNERLOOP2;
369 }
370 else
371 {
372 #undef MPN_SQR
373 #undef MPN_REDUCE
374 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
375 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
376 INNERLOOP2;
377 }
378 }
379 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
380 {
381 #undef MPN_SQR
382 #undef MPN_REDUCE
383 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
384 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
385 INNERLOOP2;
386 }
387 else
388 {
389 #undef MPN_SQR
390 #undef MPN_REDUCE
391 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
392 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
393 INNERLOOP2;
394 }
395 }
396 else
397 {
398 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
399 {
400 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
401 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
402 {
403 #undef MPN_SQR
404 #undef MPN_REDUCE
405 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
406 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
407 INNERLOOP2;
408 }
409 else
410 {
411 #undef MPN_SQR
412 #undef MPN_REDUCE
413 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
414 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
415 INNERLOOP2;
416 }
417 }
418 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
419 {
420 #undef MPN_SQR
421 #undef MPN_REDUCE
422 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
423 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
424 INNERLOOP2;
425 }
426 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
427 {
428 #undef MPN_SQR
429 #undef MPN_REDUCE
430 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
431 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
432 INNERLOOP2;
433 }
434 else
435 {
436 #undef MPN_SQR
437 #undef MPN_REDUCE
438 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
439 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
440 INNERLOOP2;
441 }
442 }
443
444 #else /* WANT_REDC_2 */
445
446 if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
447 {
448 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
449 {
450 if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
451 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
452 {
453 #undef MPN_SQR
454 #undef MPN_REDUCE
455 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
456 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
457 INNERLOOP2;
458 }
459 else
460 {
461 #undef MPN_SQR
462 #undef MPN_REDUCE
463 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
464 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
465 INNERLOOP2;
466 }
467 }
468 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
469 {
470 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
471 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
472 {
473 #undef MPN_SQR
474 #undef MPN_REDUCE
475 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
476 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
477 INNERLOOP2;
478 }
479 else
480 {
481 #undef MPN_SQR
482 #undef MPN_REDUCE
483 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
484 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
485 INNERLOOP2;
486 }
487 }
488 else
489 {
490 #undef MPN_SQR
491 #undef MPN_REDUCE
492 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
493 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
494 INNERLOOP2;
495 }
496 }
497 else
498 {
499 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
500 {
501 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
502 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
503 {
504 #undef MPN_SQR
505 #undef MPN_REDUCE
506 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
507 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
508 INNERLOOP2;
509 }
510 else
511 {
512 #undef MPN_SQR
513 #undef MPN_REDUCE
514 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
515 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
516 INNERLOOP2;
517 }
518 }
519 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
520 {
521 #undef MPN_SQR
522 #undef MPN_REDUCE
523 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
524 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
525 INNERLOOP2;
526 }
527 else
528 {
529 #undef MPN_SQR
530 #undef MPN_REDUCE
531 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
532 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
533 INNERLOOP2;
534 }
535 }
536 #endif /* WANT_REDC_2 */
537
538 MPN_COPY (tp, rp, n);
539 MPN_FILL (tp + n, n, 0);
540
541 #if WANT_REDC_2
542 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
543 MPN_REDC_1 (rp, tp, mp, n, ip[0]);
544 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
545 MPN_REDC_2 (rp, tp, mp, n, mip);
546 #else
547 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
548 MPN_REDC_1 (rp, tp, mp, n, ip[0]);
549 #endif
550 else
551 mpn_redc_n (rp, tp, mp, n, mip);
552
553 if (mpn_cmp (rp, mp, n) >= 0)
554 mpn_sub_n (rp, rp, mp, n);
555
556 TMP_FREE;
557 }
558
559 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
560 Requires that mp[n-1..0] is odd.
561 Requires that ep[en-1..0] is > 1.
562 Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
563 void
564 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
565 mp_srcptr ep, mp_size_t en,
566 mp_srcptr mp, mp_size_t n, mp_ptr tp)
567 {
568 mp_limb_t ip[2], *mip;
569 int cnt;
570 mp_bitcnt_t ebi;
571 int windowsize, this_windowsize;
572 mp_limb_t expbits;
573 mp_ptr pp, this_pp;
574 long i;
575 TMP_DECL;
576
577 ASSERT (en > 1 || (en == 1 && ep[0] > 1));
578 ASSERT (n >= 1 && ((mp[0] & 1) != 0));
579
580 if (bn == 1 && bp[0] == 2)
581 {
582 mpn_2powm (rp, ep, en, mp, n, tp);
583 return;
584 }
585
586 TMP_MARK;
587
588 MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
589
590 #if 0
591 if (bn < n)
592 {
593 /* Do the first few exponent bits without mod reductions,
594 until the result is greater than the mod argument. */
595 for (;;)
596 {
597 mpn_sqr (tp, this_pp, tn);
598 tn = tn * 2 - 1, tn += tp[tn] != 0;
599 if (getbit (ep, ebi) != 0)
600 mpn_mul (..., tp, tn, bp, bn);
601 ebi--;
602 }
603 }
604 #endif
605
606 windowsize = win_size (ebi);
607
608 #if WANT_REDC_2
609 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
610 {
611 mip = ip;
612 binvert_limb (mip[0], mp[0]);
613 mip[0] = -mip[0];
614 }
615 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
616 {
617 mip = ip;
618 mpn_binvert (mip, mp, 2, tp);
619 mip[0] = -mip[0]; mip[1] = ~mip[1];
620 }
621 #else
622 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
623 {
624 mip = ip;
625 binvert_limb (mip[0], mp[0]);
626 mip[0] = -mip[0];
627 }
628 #endif
629 else
630 {
631 mip = TMP_ALLOC_LIMBS (n);
632 mpn_binvert (mip, mp, n, tp);
633 }
634
635 pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
636
637 this_pp = pp;
638 redcify (this_pp, bp, bn, mp, n);
639
640 /* Store b^2 at rp. */
641 mpn_sqr (tp, this_pp, n);
642 #if 0
643 if (n == 1) {
644 MPN_REDC_0 (rp[0], tp[1], tp[0], mp[0], -mip[0]);
645 } else
646 #endif
647 #if WANT_REDC_2
648 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
649 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
650 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
651 MPN_REDC_2 (rp, tp, mp, n, mip);
652 #else
653 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
654 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
655 #endif
656 else
657 mpn_redc_n (rp, tp, mp, n, mip);
658
659 /* Precompute odd powers of b and put them in the temporary area at pp. */
660 for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
661 #if 1
662 if (n == 1) {
663 umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
664 ++this_pp ;
665 MPN_REDC_0 (*this_pp, tp[1], tp[0], *mp, -mip[0]);
666 } else
667 #endif
668 {
669 mpn_mul_n (tp, this_pp, rp, n);
670 this_pp += n;
671 #if WANT_REDC_2
672 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
673 MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
674 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
675 MPN_REDC_2 (this_pp, tp, mp, n, mip);
676 #else
677 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
678 MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
679 #endif
680 else
681 mpn_redc_n (this_pp, tp, mp, n, mip);
682 }
683
684 expbits = getbits (ep, ebi, windowsize);
685 ebi -= windowsize;
686
687 /* THINK: Should we initialise the case expbits % 4 == 0 with a mul? */
688 count_trailing_zeros (cnt, expbits);
689 ebi += cnt;
690 expbits >>= cnt;
691
692 MPN_COPY (rp, pp + n * (expbits >> 1), n);
693
694 #define INNERLOOP \
695 while (ebi != 0) \
696 { \
697 while (getbit (ep, ebi) == 0) \
698 { \
699 MPN_SQR (tp, rp, n); \
700 MPN_REDUCE (rp, tp, mp, n, mip); \
701 if (--ebi == 0) \
702 goto done; \
703 } \
704 \
705 /* The next bit of the exponent is 1. Now extract the largest \
706 block of bits <= windowsize, and such that the least \
707 significant bit is 1. */ \
708 \
709 expbits = getbits (ep, ebi, windowsize); \
710 this_windowsize = MIN (ebi, windowsize); \
711 \
712 count_trailing_zeros (cnt, expbits); \
713 this_windowsize -= cnt; \
714 ebi -= this_windowsize; \
715 expbits >>= cnt; \
716 \
717 do \
718 { \
719 MPN_SQR (tp, rp, n); \
720 MPN_REDUCE (rp, tp, mp, n, mip); \
721 } \
722 while (--this_windowsize != 0); \
723 \
724 MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \
725 MPN_REDUCE (rp, tp, mp, n, mip); \
726 }
727
728
729 if (n == 1)
730 {
731 #undef MPN_MUL_N
732 #undef MPN_SQR
733 #undef MPN_REDUCE
734 #define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], *(r), *(a), *(b))
735 #define MPN_SQR(r,a,n) umul_ppmm((r)[1], *(r), *(a), *(a))
736 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(*(rp), (tp)[1], (tp)[0], *(mp), - *(mip))
737 INNERLOOP;
738 }
739 else
740 #if WANT_REDC_2
741 if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
742 {
743 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
744 {
745 if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
746 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
747 {
748 #undef MPN_MUL_N
749 #undef MPN_SQR
750 #undef MPN_REDUCE
751 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
752 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
753 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
754 INNERLOOP;
755 }
756 else
757 {
758 #undef MPN_MUL_N
759 #undef MPN_SQR
760 #undef MPN_REDUCE
761 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
762 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
763 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
764 INNERLOOP;
765 }
766 }
767 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
768 {
769 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
770 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
771 {
772 #undef MPN_MUL_N
773 #undef MPN_SQR
774 #undef MPN_REDUCE
775 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
776 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
777 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
778 INNERLOOP;
779 }
780 else
781 {
782 #undef MPN_MUL_N
783 #undef MPN_SQR
784 #undef MPN_REDUCE
785 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
786 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
787 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
788 INNERLOOP;
789 }
790 }
791 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
792 {
793 #undef MPN_MUL_N
794 #undef MPN_SQR
795 #undef MPN_REDUCE
796 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
797 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
798 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
799 INNERLOOP;
800 }
801 else
802 {
803 #undef MPN_MUL_N
804 #undef MPN_SQR
805 #undef MPN_REDUCE
806 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
807 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
808 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
809 INNERLOOP;
810 }
811 }
812 else
813 {
814 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
815 {
816 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
817 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
818 {
819 #undef MPN_MUL_N
820 #undef MPN_SQR
821 #undef MPN_REDUCE
822 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
823 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
824 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
825 INNERLOOP;
826 }
827 else
828 {
829 #undef MPN_MUL_N
830 #undef MPN_SQR
831 #undef MPN_REDUCE
832 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
833 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
834 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
835 INNERLOOP;
836 }
837 }
838 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
839 {
840 #undef MPN_MUL_N
841 #undef MPN_SQR
842 #undef MPN_REDUCE
843 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
844 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
845 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
846 INNERLOOP;
847 }
848 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
849 {
850 #undef MPN_MUL_N
851 #undef MPN_SQR
852 #undef MPN_REDUCE
853 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
854 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
855 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
856 INNERLOOP;
857 }
858 else
859 {
860 #undef MPN_MUL_N
861 #undef MPN_SQR
862 #undef MPN_REDUCE
863 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
864 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
865 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
866 INNERLOOP;
867 }
868 }
869
870 #else /* WANT_REDC_2 */
871
872 if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
873 {
874 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
875 {
876 if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
877 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
878 {
879 #undef MPN_MUL_N
880 #undef MPN_SQR
881 #undef MPN_REDUCE
882 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
883 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
884 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
885 INNERLOOP;
886 }
887 else
888 {
889 #undef MPN_MUL_N
890 #undef MPN_SQR
891 #undef MPN_REDUCE
892 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
893 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
894 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
895 INNERLOOP;
896 }
897 }
898 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
899 {
900 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
901 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
902 {
903 #undef MPN_MUL_N
904 #undef MPN_SQR
905 #undef MPN_REDUCE
906 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
907 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
908 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
909 INNERLOOP;
910 }
911 else
912 {
913 #undef MPN_MUL_N
914 #undef MPN_SQR
915 #undef MPN_REDUCE
916 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
917 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
918 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
919 INNERLOOP;
920 }
921 }
922 else
923 {
924 #undef MPN_MUL_N
925 #undef MPN_SQR
926 #undef MPN_REDUCE
927 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
928 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
929 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
930 INNERLOOP;
931 }
932 }
933 else
934 {
935 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
936 {
937 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
938 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
939 {
940 #undef MPN_MUL_N
941 #undef MPN_SQR
942 #undef MPN_REDUCE
943 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
944 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
945 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
946 INNERLOOP;
947 }
948 else
949 {
950 #undef MPN_MUL_N
951 #undef MPN_SQR
952 #undef MPN_REDUCE
953 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
954 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
955 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
956 INNERLOOP;
957 }
958 }
959 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
960 {
961 #undef MPN_MUL_N
962 #undef MPN_SQR
963 #undef MPN_REDUCE
964 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
965 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
966 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
967 INNERLOOP;
968 }
969 else
970 {
971 #undef MPN_MUL_N
972 #undef MPN_SQR
973 #undef MPN_REDUCE
974 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
975 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
976 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
977 INNERLOOP;
978 }
979 }
980 #endif /* WANT_REDC_2 */
981
982 done:
983
984 MPN_COPY (tp, rp, n);
985 MPN_ZERO (tp + n, n);
986
987 #if WANT_REDC_2
988 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
989 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
990 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
991 MPN_REDC_2 (rp, tp, mp, n, mip);
992 #else
993 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
994 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
995 #endif
996 else
997 mpn_redc_n (rp, tp, mp, n, mip);
998
999 if (mpn_cmp (rp, mp, n) >= 0)
1000 mpn_sub_n (rp, rp, mp, n);
1001
1002 TMP_FREE;
1003 }