1 /* Mulptiplication mod B^n+1, for small operands.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2020-2022 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 #include "gmp-impl.h"
38
39 #ifndef MOD_BKNP1_USE11
40 #define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))
41 #endif
42 #ifndef MOD_BKNP1_ONLY3
43 #define MOD_BKNP1_ONLY3 0
44 #endif
45
46 /* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */
47 static void
48 _mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k)
49 {
50 mp_limb_t hl;
51 mp_srcptr hp;
52 unsigned i;
53
54 #if MOD_BKNP1_ONLY3
55 ASSERT (k == 3);
56 k = 3;
57 #endif
58 ASSERT (k > 2);
59 ASSERT (k % 2 == 1);
60
61 --k;
62
63 rp += k * n;
64 op += k * n;
65 hp = op;
66 hl = hp[n]; /* initial op[k*n]. */
67 ASSERT (hl < GMP_NUMB_MAX - 1);
68
69 #if MOD_BKNP1_ONLY3 == 0
70 /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be
71 rp[n] = cy; */
72 *rp = 0;
73 #endif
74
75 i = k >> 1;
76 do
77 {
78 mp_limb_t cy, bw;
79 rp -= n;
80 op -= n;
81 cy = hl + mpn_add_n (rp, op, hp, n);
82 #if MOD_BKNP1_ONLY3
83 rp[n] = cy;
84 #else
85 MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy);
86 #endif
87 rp -= n;
88 op -= n;
89 bw = hl + mpn_sub_n (rp, op, hp, n);
90 MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw);
91 }
92 while (--i != 0);
93
94 for (; (hl = *(rp += k * n)) != 0; ) /* Should run only once... */
95 {
96 *rp = 0;
97 i = k >> 1;
98 do
99 {
100 rp -= n;
101 MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl);
102 rp -= n;
103 MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl);
104 }
105 while (--i != 0);
106 }
107 }
108
109 static void
110 _mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
111 {
112 ASSERT (r[n] == h);
113
114 /* Fully normalise */
115 MPN_DECR_U (r, n + 1, h);
116 h -= r[n];
117 r[n] = 0;
118 MPN_INCR_U (r, n + 1, h);
119 }
120
121 static void
122 _mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
123 {
124 r[n] = 0;
125 MPN_INCR_U (r, n + 1, -h);
126 if (UNLIKELY (r[n] != 0))
127 _mpn_modbnp1_pn_ip (r, n, 1);
128 }
129
130 static void
131 _mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
132 {
133 if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */
134 {
135 _mpn_modbnp1_neg_ip (r, n, h);
136 }
137 else
138 {
139 r[n] = h;
140 if (h)
141 _mpn_modbnp1_pn_ip(r, n, h);
142 }
143 }
144
145 /* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */
146 /* Used when rn < on < 2*rn. */
147 static void
148 _mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on)
149 {
150 mp_limb_t bw;
151
152 #if 0
153 if (UNLIKELY (on <= rn))
154 {
155 MPN_COPY (rp, op, on);
156 MPN_ZERO (rp + on, rn - on);
157 return;
158 }
159 #endif
160
161 ASSERT (on > rn);
162 ASSERT (on <= 2 * rn);
163
164 bw = mpn_sub (rp, op, rn, op + rn, on - rn);
165 rp[rn] = 0;
166 MPN_INCR_U (rp, rn + 1, bw);
167 }
168
169 /* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */
170 /* With odd k >= 3. */
171 static void
172 _mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k)
173 {
174 mp_limb_t cy;
175
176 #if MOD_BKNP1_ONLY3
177 ASSERT (k == 3);
178 k = 3;
179 #endif
180 ASSERT (k & 1);
181 k >>= 1;
182 ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 3);
183 ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k);
184
185 cy = - mpn_sub_n (rp, op, op + rn, rn);
186 for (;;) {
187 op += 2 * rn;
188 cy += mpn_add_n (rp, rp, op, rn);
189 if (--k == 0)
190 break;
191 cy -= mpn_sub_n (rp, rp, op + rn, rn);
192 };
193
194 cy += op[rn];
195 _mpn_modbnp1_nc_ip (rp, rn, cy);
196 }
197
198 /* For the various mpn_divexact_byN here, fall back to using either
199 mpn_pi1_bdiv_q_1 or mpn_divexact_1. The former has less overhead and is
200 faster if it is native. For now, since mpn_divexact_1 is native on
201 platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
202 mpn_pi1_bdiv_q_1 unconditionally. FIXME. */
203
204 #ifndef mpn_divexact_by5
205 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
206 #define BINVERT_5 \
207 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX)
208 #define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0)
209 #else
210 #define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5)
211 #endif
212 #endif
213
214 #ifndef mpn_divexact_by7
215 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
216 #define BINVERT_7 \
217 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX)
218 #define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0)
219 #else
220 #define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7)
221 #endif
222 #endif
223
224 #ifndef mpn_divexact_by11
225 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
226 #define BINVERT_11 \
227 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX)
228 #define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0)
229 #else
230 #define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11)
231 #endif
232 #endif
233
234 #ifndef mpn_divexact_by13
235 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
236 #define BINVERT_13 \
237 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX)
238 #define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0)
239 #else
240 #define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13)
241 #endif
242 #endif
243
244 #ifndef mpn_divexact_by17
245 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
246 #define BINVERT_17 \
247 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX)
248 #define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0)
249 #else
250 #define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17)
251 #endif
252 #endif
253
254 /* Thanks to Chinese remainder theorem, store
255 in {rp, k*n+1} the value mod (B^(k*n)+1), given
256 {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and
257 {bp, n+1} mod (B^n+1) .
258 {tp, n+1} is a scratch area.
259 tp == rp or rp == ap are possible.
260 */
261 static void
262 _mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
263 mp_size_t n, unsigned k, mp_ptr tp)
264 {
265 mp_limb_t mod;
266 unsigned i;
267
268 #if MOD_BKNP1_ONLY3
269 ASSERT (k == 3);
270 k = 3;
271 #endif
272 _mpn_modbnp1_kn (tp, ap, n, k);
273 if (mpn_sub_n (tp, bp, tp, n + 1))
274 _mpn_modbnp1_neg_ip (tp, n, tp[n]);
275
276 #if MOD_BKNP1_USE11
277 if (UNLIKELY (k == 11))
278 {
279 ASSERT (GMP_NUMB_BITS % 2 == 0);
280 /* mod <- -Mod(B^n+1,11)^-1 */
281 mod = n * (GMP_NUMB_BITS % 5) % 5;
282 if ((mod > 2) || UNLIKELY (mod == 0))
283 mod += 5;
284
285 mod *= mpn_mod_1 (tp, n + 1, 11);
286 }
287 else
288 #endif
289 {
290 #if GMP_NUMB_BITS % 8 == 0
291 /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1) */
292 /* (2^6 - 1) = 3^2 * 7 */
293 mod = mpn_mod_34lsub1 (tp, n + 1);
294 ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0);
295 /* (2^12 - 1) = 3^2 * 5 * 7 * 13 */
296 /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241 */
297 ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17);
298
299 #if GMP_NUMB_BITS % 3 != 0
300 if (UNLIKELY (k != 3))
301 {
302 ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0));
303 if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
304 mod <<= 1; /* k >> 1 = 1 << 1 */
305 else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))
306 mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3;
307 else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
308 mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9;
309 else /* k == 17 */
310 mod <<= 3; /* k >> 1 = 1 << 3 */
311 #if 0
312 if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ ||
313 (GMP_NUMB_BITS == 16) && (k == 13))
314 mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) +
315 (mod >> (3 * GMP_NUMB_BITS >> 2)));
316 #endif
317 }
318 #else
319 ASSERT (GMP_NUMB_MAX % k == 0);
320 /* 2^{GMP_NUMB_BITS} - 1 = 0 (mod k) */
321 /* 2^{GMP_NUMB_BITS} = 1 (mod k) */
322 /* 2^{n*GMP_NUMB_BITS} + 1 = 2 (mod k) */
323 /* -2^{-1} = k >> 1 (mod k) */
324 mod *= k >> 1;
325 #endif
326 #else
327 ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */
328 #endif
329 }
330
331 MPN_INCR_U (tp, n + 1, mod);
332 tp[n] += mod;
333
334 if (LIKELY (k == 3))
335 ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1));
336 else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
337 mpn_divexact_by5 (tp, tp, n + 1);
338 else if (((! MOD_BKNP1_USE11) && (GMP_NUMB_BITS % 16 != 0))
339 || LIKELY (k == 7))
340 mpn_divexact_by7 (tp, tp, n + 1);
341 #if MOD_BKNP1_USE11
342 else if (k == 11)
343 mpn_divexact_by11 (tp, tp, n + 1);
344 #endif
345 else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
346 mpn_divexact_by13 (tp, tp, n + 1);
347 else /* (k == 17) */
348 mpn_divexact_by17 (tp, tp, n + 1);
349
350 rp += k * n;
351 ap += k * n; /* tp - 1 */
352
353 rp -= n;
354 ap -= n;
355 ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1));
356
357 i = k >> 1;
358 do
359 {
360 mp_limb_t cy, bw;
361 rp -= n;
362 ap -= n;
363 bw = mpn_sub_n (rp, ap, tp, n) + tp[n];
364 MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw);
365 rp -= n;
366 ap -= n;
367 cy = mpn_add_n (rp, ap, tp, n) + tp[n];
368 MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy);
369 }
370 while (--i != 0);
371
372 /* if (LIKELY (rp[k * n])) */
373 _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]);
374 }
375
376
377 static void
378 _mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
379 mp_ptr tp)
380 {
381 mp_limb_t cy;
382 unsigned k;
383
384 ASSERT (0 < rn);
385 ASSERT ((ap[rn] | bp[rn]) <= 1);
386
387 if (UNLIKELY (ap[rn] | bp[rn]))
388 {
389 if (ap[rn])
390 cy = bp[rn] + mpn_neg (rp, bp, rn);
391 else /* ap[rn] == 0 */
392 cy = mpn_neg (rp, ap, rn);
393 }
394 else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
395 {
396 rn /= k;
397 mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp);
398 return;
399 }
400 else
401 {
402 mpn_mul_n (tp, ap, bp, rn);
403 cy = mpn_sub_n (rp, tp, tp + rn, rn);
404 }
405 rp[rn] = 0;
406 MPN_INCR_U (rp, rn + 1, cy);
407 }
408
409 /* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */
410 /* tp must point to at least 4*(k-1)*n+1 limbs*/
411 void
412 mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
413 mp_size_t n, unsigned k, mp_ptr tp)
414 {
415 mp_ptr hp;
416
417 #if MOD_BKNP1_ONLY3
418 ASSERT (k == 3);
419 k = 3;
420 #endif
421 ASSERT (k > 2);
422 ASSERT (k % 2 == 1);
423
424 /* a % (B^{nn}+1)/(B^{nn/k}+1) */
425 _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
426 /* b % (B^{nn}+1)/(B^{nn/k}+1) */
427 _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k);
428 mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n);
429 _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
430
431 hp = tp + k * n + 1;
432 /* a % (B^{nn/k}+1) */
433 ASSERT (ap[k * n] <= 1);
434 _mpn_modbnp1_kn (hp, ap, n, k);
435 /* b % (B^{nn/k}+1) */
436 ASSERT (bp[k * n] <= 1);
437 _mpn_modbnp1_kn (hp + n + 1, bp, n, k);
438 _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2);
439
440 _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp);
441 }
442
443
444 static void
445 _mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn,
446 mp_ptr tp)
447 {
448 mp_limb_t cy;
449 unsigned k;
450
451 ASSERT (0 < rn);
452
453 if (UNLIKELY (ap[rn]))
454 {
455 ASSERT (ap[rn] == 1);
456 *rp = 1;
457 MPN_FILL (rp + 1, rn, 0);
458 return;
459 }
460 else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
461 {
462 rn /= k;
463 mpn_sqrmod_bknp1 (rp, ap, rn, k, tp);
464 return;
465 }
466 else
467 {
468 mpn_sqr (tp, ap, rn);
469 cy = mpn_sub_n (rp, tp, tp + rn, rn);
470 }
471 rp[rn] = 0;
472 MPN_INCR_U (rp, rn + 1, cy);
473 }
474
475 /* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */
476 /* tp must point to at least 3*(k-1)*n+1 limbs*/
477 void
478 mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap,
479 mp_size_t n, unsigned k, mp_ptr tp)
480 {
481 mp_ptr hp;
482
483 #if MOD_BKNP1_ONLY3
484 ASSERT (k == 3);
485 k = 3;
486 #endif
487 ASSERT (k > 2);
488 ASSERT (k % 2 == 1);
489
490 /* a % (B^{nn}+1)/(B^{nn/k}+1) */
491 _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
492 mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n);
493 _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
494
495 hp = tp + k * n + 1;
496 /* a % (B^{nn/k}+1) */
497 ASSERT (ap[k * n] <= 1);
498 _mpn_modbnp1_kn (hp, ap, n, k);
499 _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1));
500
501 _mpn_crt (rp, tp, hp + (n + 1), n, k, hp);
502 }