1 /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6 IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7 IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8 DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10 Copyright 2010-2012, 2015-2017, 2020, 2021 Free Software Foundation, Inc.
11
12 This file is part of the GNU MP Library.
13
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
20
21 or
22
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
26
27 or both in parallel, as here.
28
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
33
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
37
38 #include "gmp-impl.h"
39 #include "longlong.h"
40
41 /* TODO:
42 - split this file in smaller parts with functions that can be recycled for different computations.
43 */
44
45 /**************************************************************/
46 /* Section macros: common macros, for mswing/fac/bin (&sieve) */
47 /**************************************************************/
48
49 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
50 if ((PR) > (MAX_PR)) { \
51 (VEC)[(I)++] = (PR); \
52 (PR) = 1; \
53 }
54
55 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
56 do { \
57 if ((PR) > (MAX_PR)) { \
58 (VEC)[(I)++] = (PR); \
59 (PR) = (P); \
60 } else \
61 (PR) *= (P); \
62 } while (0)
63
64 #define LOOP_ON_SIEVE_CONTINUE(prime,end) \
65 __max_i = (end); \
66 \
67 do { \
68 ++__i; \
69 if ((*__sieve & __mask) == 0) \
70 { \
71 mp_limb_t prime; \
72 prime = id_to_n(__i)
73
74 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
75 do { \
76 mp_limb_t __mask, *__sieve, __max_i, __i; \
77 \
78 __i = (start)-(off); \
79 __sieve = (sieve) + __i / GMP_LIMB_BITS; \
80 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
81 __i += (off); \
82 \
83 LOOP_ON_SIEVE_CONTINUE(prime,end)
84
85 #define LOOP_ON_SIEVE_STOP \
86 } \
87 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
88 __sieve += __mask & 1; \
89 } while (__i <= __max_i)
90
91 #define LOOP_ON_SIEVE_END \
92 LOOP_ON_SIEVE_STOP; \
93 } while (0)
94
95 /*********************************************************/
96 /* Section sieve: sieving functions and tools for primes */
97 /*********************************************************/
98
99 #if WANT_ASSERT
100 static mp_limb_t
101 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
102 #endif
103
104 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
105 static mp_limb_t
106 id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
107
108 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
109 static mp_limb_t
110 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
111
112 #if WANT_ASSERT
113 static mp_size_t
114 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
115 #endif
116
117 /*********************************************************/
118 /* Section mswing: 2-multiswing factorial */
119 /*********************************************************/
120
121 /* Returns an approximation of the sqare root of x.
122 * It gives:
123 * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
124 * or
125 * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
126 */
127 static mp_limb_t
128 limb_apprsqrt (mp_limb_t x)
129 {
130 int s;
131
132 ASSERT (x > 2);
133 count_leading_zeros (s, x);
134 s = (GMP_LIMB_BITS - s) >> 1;
135 return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s));
136 }
137
138 #if 0
139 /* A count-then-exponentiate variant for SWING_A_PRIME */
140 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
141 do { \
142 mp_limb_t __q, __prime; \
143 int __exp; \
144 __prime = (P); \
145 __exp = 0; \
146 __q = (N); \
147 do { \
148 __q /= __prime; \
149 __exp += __q & 1; \
150 } while (__q >= __prime); \
151 if (__exp) { /* Store $prime^{exp}$ */ \
152 for (__q = __prime; --__exp; __q *= __prime); \
153 FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
154 }; \
155 } while (0)
156 #else
157 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
158 do { \
159 mp_limb_t __q, __prime; \
160 __prime = (P); \
161 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
162 __q = (N); \
163 do { \
164 __q /= __prime; \
165 if ((__q & 1) != 0) (PR) *= __prime; \
166 } while (__q >= __prime); \
167 } while (0)
168 #endif
169
170 #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
171 do { \
172 mp_limb_t __prime; \
173 __prime = (P); \
174 if ((((N) / __prime) & 1) != 0) \
175 FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
176 } while (0)
177
178 /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
179 factorial of the parameter n. The result x is an odd positive
180 integer so that multiswing(n,2) = x 2^a.
181
182 Uses the algorithm described by Peter Luschny in "Divide, Swing and
183 Conquer the Factorial!".
184
185 The pointer sieve points to primesieve_size(n) limbs containing a
186 bit-array where primes are marked as 0.
187 Enough (FIXME: explain :-) limbs must be pointed by factors.
188 */
189
190 static void
191 mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
192 {
193 mp_limb_t prod, max_prod;
194 mp_size_t j;
195
196 ASSERT (n > 25);
197
198 j = 0;
199 prod = -(n & 1);
200 n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
201
202 prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
203 max_prod = GMP_NUMB_MAX / (n-1);
204
205 /* Handle prime = 3 separately. */
206 SWING_A_PRIME (3, n, prod, max_prod, factors, j);
207
208 /* Swing primes from 5 to n/3 */
209 {
210 mp_limb_t s, l_max_prod;
211
212 s = limb_apprsqrt(n);
213 ASSERT (s >= 5);
214 s = n_to_bit (s);
215 ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
216 ASSERT (s < n_to_bit (n / 3));
217 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
218 SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
219 LOOP_ON_SIEVE_STOP;
220
221 ASSERT (max_prod <= GMP_NUMB_MAX / 3);
222
223 l_max_prod = max_prod * 3;
224
225 LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3));
226 SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
227 LOOP_ON_SIEVE_END;
228 }
229
230 /* Store primes from (n+1)/2 to n */
231 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
232 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
233 LOOP_ON_SIEVE_END;
234
235 if (LIKELY (j != 0))
236 {
237 factors[j++] = prod;
238 mpz_prodlimbs (x, factors, j);
239 }
240 else
241 {
242 ASSERT (ALLOC (x) > 0);
243 PTR (x)[0] = prod;
244 SIZ (x) = 1;
245 }
246 }
247
248 #undef SWING_A_PRIME
249 #undef SH_SWING_A_PRIME
250 #undef LOOP_ON_SIEVE_END
251 #undef LOOP_ON_SIEVE_STOP
252 #undef LOOP_ON_SIEVE_BEGIN
253 #undef LOOP_ON_SIEVE_CONTINUE
254 #undef FACTOR_LIST_APPEND
255
256 /*********************************************************/
257 /* Section oddfac: odd factorial, needed also by binomial*/
258 /*********************************************************/
259
260 /* FIXME: refine che following estimate. */
261
262 #if TUNE_PROGRAM_BUILD
263 #define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD_LIMIT*FAC_DSC_THRESHOLD_LIMIT-1)+1) - 1)
264 #else
265 #define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD*FAC_DSC_THRESHOLD-1)+1) - 1)
266 #endif
267
268 /* mpz_oddfac_1 computes the odd part of the factorial of the
269 parameter n. I.e. n! = x 2^a, where x is the returned value: an
270 odd positive integer.
271
272 If flag != 0 a square is skipped in the DSC part, e.g.
273 if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
274
275 If n is too small, flag is ignored, and an ASSERT can be triggered.
276
277 TODO: FAC_DSC_THRESHOLD is used here with two different roles:
278 - to decide when prime factorisation is needed,
279 - to stop the recursion, once sieving is done.
280 Maybe two thresholds can do a better job.
281 */
282 void
283 mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
284 {
285 ASSERT (n <= GMP_NUMB_MAX);
286 ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
287
288 if (n <= ODD_FACTORIAL_TABLE_LIMIT)
289 {
290 MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
291 SIZ (x) = 1;
292 }
293 else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
294 {
295 mp_ptr px;
296
297 px = MPZ_NEWALLOC (x, 2);
298 umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
299 SIZ (x) = 2;
300 }
301 else
302 {
303 unsigned s;
304 mp_ptr factors;
305
306 s = 0;
307 {
308 mp_limb_t tn;
309 mp_limb_t prod, max_prod;
310 mp_size_t j;
311 TMP_SDECL;
312
313 #if TUNE_PROGRAM_BUILD
314 ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
315 ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
316 #endif
317
318 /* Compute the number of recursive steps for the DSC algorithm. */
319 for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
320 tn >>= 1;
321
322 j = 0;
323
324 TMP_SMARK;
325 factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
326 ASSERT (tn >= FACTORS_PER_LIMB);
327
328 prod = 1;
329 #if TUNE_PROGRAM_BUILD
330 max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD_LIMIT * FAC_DSC_THRESHOLD_LIMIT);
331 #else
332 max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD * FAC_DSC_THRESHOLD);
333 #endif
334
335 ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
336 do {
337 factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
338 mp_limb_t diff = (tn - ODD_DOUBLEFACTORIAL_TABLE_LIMIT) & -CNST_LIMB (2);
339 if ((diff & 2) != 0)
340 {
341 FACTOR_LIST_STORE (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff, prod, max_prod, factors, j);
342 diff -= 2;
343 }
344 if (diff != 0)
345 {
346 mp_limb_t fac = (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2) *
347 (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff);
348 do {
349 FACTOR_LIST_STORE (fac, prod, max_prod, factors, j);
350 diff -= 4;
351 fac += diff * 2;
352 } while (diff != 0);
353 }
354 max_prod <<= 2;
355 tn >>= 1;
356 } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
357
358 factors[j++] = prod;
359 factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
360 factors[j++] = __gmp_oddfac_table[tn >> 1];
361 mpz_prodlimbs (x, factors, j);
362
363 TMP_SFREE;
364 }
365
366 if (s != 0)
367 /* Use the algorithm described by Peter Luschny in "Divide,
368 Swing and Conquer the Factorial!".
369
370 Improvement: there are two temporary buffers, factors and
371 square, that are never used together; with a good estimate
372 of the maximal needed size, they could share a single
373 allocation.
374 */
375 {
376 mpz_t mswing;
377 mp_ptr sieve;
378 mp_size_t size;
379 TMP_DECL;
380
381 TMP_MARK;
382
383 flag--;
384 size = n / GMP_NUMB_BITS + 4;
385 ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
386 /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
387 one more can be overwritten by mul, another for the sieve */
388 MPZ_TMP_INIT (mswing, size);
389 /* Initialize size, so that ASSERT can check it correctly. */
390 ASSERT_CODE (SIZ (mswing) = 0);
391
392 /* Put the sieve on the second half, it will be overwritten by the last mswing. */
393 sieve = PTR (mswing) + size / 2 + 1;
394
395 size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
396
397 factors = TMP_ALLOC_LIMBS (size);
398 do {
399 mp_ptr square, px;
400 mp_size_t nx, ns;
401 mp_limb_t cy;
402 TMP_DECL;
403
404 s--;
405 ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
406 mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
407
408 TMP_MARK;
409 nx = SIZ (x);
410 if (s == flag) {
411 size = nx;
412 square = TMP_ALLOC_LIMBS (size);
413 MPN_COPY (square, PTR (x), nx);
414 } else {
415 size = nx << 1;
416 square = TMP_ALLOC_LIMBS (size);
417 mpn_sqr (square, PTR (x), nx);
418 size -= (square[size - 1] == 0);
419 }
420 ns = SIZ (mswing);
421 nx = size + ns;
422 px = MPZ_NEWALLOC (x, nx);
423 ASSERT (ns <= size);
424 cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
425
426 SIZ(x) = nx - (cy == 0);
427 TMP_FREE;
428 } while (s != 0);
429 TMP_FREE;
430 }
431 }
432 }
433
434 #undef FACTORS_PER_LIMB
435 #undef FACTOR_LIST_STORE