1 /* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
2
3 Copyright 1998-2002, 2012, 2013, 2015, 2017-2018, 2020 Free Software
4 Foundation, Inc.
5
6 This file is part of the GNU MP Library.
7
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14
15 or
16
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
19 later version.
20
21 or both in parallel, as here.
22
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 for more details.
27
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
31
32 #include "gmp-impl.h"
33
34 /* How many special cases? Minimum is 2: 0 and 1;
35 * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
36 */
37 #define APARTAJ_KALKULOJ 2
38
39 /* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
40 * the operands fit.
41 */
42 #define UZU_BIN_UIUI 0
43
44 /* Whether to use a shortcut to precompute the product of four
45 * elements (1), or precompute only the product of a couple (0).
46 *
47 * In both cases the precomputed product is then updated with some
48 * linear operations to obtain the product of the next four (1)
49 * [or two (0)] operands.
50 */
51 #define KVAROPE 1
52
53 static void
54 posmpz_init (mpz_ptr r)
55 {
56 mp_ptr rp;
57 ASSERT (SIZ (r) > 0);
58 rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
59 *rp = 0;
60 *++rp = 0;
61 }
62
63 /* Equivalent to mpz_add_ui (r, r, in), but faster when
64 0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
65 static void
66 posmpz_inc_ui (mpz_ptr r, unsigned long in)
67 {
68 #if BITS_PER_ULONG > GMP_NUMB_BITS
69 mpz_add_ui (r, r, in);
70 #else
71 ASSERT (SIZ (r) > 0);
72 MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
73 SIZ (r) += PTR (r)[SIZ (r)];
74 #endif
75 }
76
77 /* Equivalent to mpz_sub_ui (r, r, in), but faster when
78 0 < SIZ (r) and we know in advance that the result is positive. */
79 static void
80 posmpz_dec_ui (mpz_ptr r, unsigned long in)
81 {
82 #if BITS_PER_ULONG > GMP_NUMB_BITS
83 mpz_sub_ui (r, r, in);
84 #else
85 ASSERT (mpz_cmp_ui (r, in) >= 0);
86 MPN_DECR_U (PTR (r), SIZ (r), in);
87 SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
88 #endif
89 }
90
91 /* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
92 0 < SIZ (r) and we know in advance that the result is positive. */
93 static void
94 posmpz_rsh1 (mpz_ptr r)
95 {
96 mp_ptr rp;
97 mp_size_t rn;
98
99 rn = SIZ (r);
100 rp = PTR (r);
101 ASSERT (rn > 0);
102 mpn_rshift (rp, rp, rn, 1);
103 SIZ (r) -= rp[rn - 1] == 0;
104 }
105
106 /* Computes r = n(n+(2*k-1))/2
107 It uses a sqare instead of a product, computing
108 r = ((n+k-1)^2 + n - (k-1)^2)/2
109 As a side effect, sets t = n+k-1
110 */
111 static void
112 mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
113 {
114 ASSERT (k > 0 && SIZ(n) > 0);
115 --k;
116 mpz_add_ui (t, n, k);
117 mpz_mul (r, t, t);
118 mpz_add (r, r, n);
119 posmpz_rsh1 (r);
120 if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
121 posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
122 else
123 {
124 mpz_t tmp;
125 mpz_init_set_ui (tmp, (k + (k & 1)));
126 mpz_mul_ui (tmp, tmp, k >> 1);
127 mpz_sub (r, r, tmp);
128 mpz_clear (tmp);
129 }
130 }
131
132 #if KVAROPE
133 static void
134 rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
135 {
136 if (k - lk < 5)
137 {
138 do {
139 posmpz_inc_ui (p, 4*k+2);
140 mpz_addmul_ui (P, p, 4*k);
141 posmpz_dec_ui (P, k);
142 mpz_mul (r, r, P);
143 } while (--k > lk);
144 }
145 else
146 {
147 mpz_t lt;
148 unsigned long int m;
149
150 ALLOC (lt) = 0;
151 SIZ (lt) = 0;
152 if (t == NULL)
153 t = lt;
154 m = ((k + lk) >> 1) + 1;
155 rek_raising_fac4 (r, p, P, k, m, t);
156
157 posmpz_inc_ui (p, 4*m+2);
158 mpz_addmul_ui (P, p, 4*m);
159 posmpz_dec_ui (P, m);
160 mpz_set (t, P);
161 rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
162
163 mpz_mul (r, r, t);
164 mpz_clear (lt);
165 }
166 }
167
168 /* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
169 rek_raising_fac4, and exploiting an idea inspired by a piece of
170 code that Fredrik Johansson wrote and by a comment by Niels Möller.
171
172 Assume k = 4i then compute:
173 p = (n+1)(n+4i)/2 - i
174 (n+1+1)(n+4i)/2 = p + i + (n+4i)/2
175 (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
176 P = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
177 n' = n + 2
178 i' = i - 1
179 (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
180 (n'-1)(n'+4i'+2)/2 - i' - 1 = p
181 (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
182 (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) = p + 4i' + 1
183 (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
184 p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
185 p' - 4i' - 2 = p
186 (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
187 (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
188 (p' - 3i' - 1)*(p' - i')/2 = P
189 (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
190 (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
191 (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
192 (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
193 P' = P + 4i'p' - i'
194
195 And compute the product P * P' * P" ...
196 */
197
198 static void
199 mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
200 {
201 ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
202 posmpz_init (n);
203 posmpz_inc_ui (n, 1);
204 SIZ (r) = 0;
205 if (k & 1)
206 {
207 mpz_set (r, n);
208 posmpz_inc_ui (n, 1);
209 }
210 k >>= 1;
211 if (APARTAJ_KALKULOJ < 2 && k == 0)
212 return;
213
214 mpz_hmul_nbnpk (p, n, k, t);
215 posmpz_init (p);
216
217 if (k & 1)
218 {
219 if (SIZ (r))
220 mpz_mul (r, r, p);
221 else
222 mpz_set (r, p);
223 posmpz_inc_ui (p, k - 1);
224 }
225 k >>= 1;
226 if (APARTAJ_KALKULOJ < 4 && k == 0)
227 return;
228
229 mpz_hmul_nbnpk (t, p, k, n);
230 if (SIZ (r))
231 mpz_mul (r, r, t);
232 else
233 mpz_set (r, t);
234
235 if (APARTAJ_KALKULOJ > 8 || k > 1)
236 {
237 posmpz_dec_ui (p, k);
238 rek_raising_fac4 (r, p, t, k - 1, 0, n);
239 }
240 }
241
242 #else /* KVAROPE */
243
244 static void
245 rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
246 {
247 /* Should the threshold depend on SIZ (n) ? */
248 if (k - lk < 10)
249 {
250 do {
251 posmpz_inc_ui (n, k);
252 mpz_mul (r, r, n);
253 --k;
254 } while (k > lk);
255 }
256 else
257 {
258 mpz_t t3;
259 unsigned long int m;
260
261 m = ((k + lk) >> 1) + 1;
262 rek_raising_fac (r, n, k, m, t1, t2);
263
264 posmpz_inc_ui (n, m);
265 if (t1 == NULL)
266 {
267 mpz_init_set (t3, n);
268 t1 = t3;
269 }
270 else
271 {
272 ALLOC (t3) = 0;
273 mpz_set (t1, n);
274 }
275 rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
276
277 mpz_mul (r, r, t1);
278 mpz_clear (t3);
279 }
280 }
281
282 /* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
283 rek_raising_fac, and exploiting an idea inspired by a piece of
284 code that Fredrik Johansson wrote.
285
286 Force an even k = 2i then compute:
287 p = (n+1)(n+2i)/2
288 i' = i - 1
289 p == (n+1)(n+2i'+2)/2
290 p' = p + i' == (n+2)(n+2i'+1)/2
291 n' = n + 1
292 p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
293
294 And compute the product p * p' * p" ...
295 */
296
297 static void
298 mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
299 {
300 unsigned long int hk;
301 ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
302 mpz_add_ui (n, n, 1);
303 hk = k >> 1;
304 mpz_hmul_nbnpk (p, n, hk, t);
305
306 if ((k & 1) != 0)
307 {
308 mpz_add_ui (t, t, hk + 1);
309 mpz_mul (r, t, p);
310 }
311 else
312 {
313 mpz_set (r, p);
314 }
315
316 if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
317 {
318 posmpz_init (p);
319 rek_raising_fac (r, p, hk - 1, 0, t, n);
320 }
321 }
322 #endif /* KVAROPE */
323
324 /* This is a poor implementation. Look at bin_uiui.c for improvement ideas.
325 In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
326 the code here only for big n.
327
328 The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
329 1 section 1.2.6 part G. */
330
331 void
332 mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
333 {
334 mpz_t ni;
335 mp_size_t negate;
336
337 if (SIZ (n) < 0)
338 {
339 /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
340 mpz_init (ni);
341 mpz_add_ui (ni, n, 1L);
342 mpz_neg (ni, ni);
343 negate = (k & 1); /* (-1)^k */
344 }
345 else
346 {
347 /* bin(n,k) == 0 if k>n
348 (no test for this under the n<0 case, since -n+k-1 >= k there) */
349 if (mpz_cmp_ui (n, k) < 0)
350 {
351 SIZ (r) = 0;
352 return;
353 }
354
355 /* set ni = n-k */
356 mpz_init (ni);
357 mpz_sub_ui (ni, n, k);
358 negate = 0;
359 }
360
361 /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
362 for positive, 1 for negative). */
363
364 /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's
365 whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
366 = ni, and new ni of ni+k-ni = k. */
367 if (mpz_cmp_ui (ni, k) < 0)
368 {
369 unsigned long tmp;
370 tmp = k;
371 k = mpz_get_ui (ni);
372 mpz_set_ui (ni, tmp);
373 }
374
375 if (k < APARTAJ_KALKULOJ)
376 {
377 if (k == 0)
378 {
379 SIZ (r) = 1;
380 MPZ_NEWALLOC (r, 1)[0] = 1;
381 }
382 #if APARTAJ_KALKULOJ > 2
383 else if (k > 1)
384 {
385 mpz_add_ui (ni, ni, 1 + (APARTAJ_KALKULOJ > 2 && k > 2));
386 mpz_mul (r, ni, ni); /* r = (n + (k>2))^2 */
387 if (APARTAJ_KALKULOJ == 2 || k == 2)
388 {
389 mpz_add (r, r, ni); /* n^2+n= n(n+1) */
390 posmpz_rsh1 (r);
391 }
392 #if APARTAJ_KALKULOJ > 3
393 #if APARTAJ_KALKULOJ != 5
394 #error Not implemented! 3 < APARTAJ_KALKULOJ != 5
395 #endif
396 else /* k > 2 */
397 { /* k = 3, 4 */
398 mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
399 if (k == 3)
400 {
401 mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
402 /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
403 }
404 else /* k = 4 */
405 {
406 mpz_add (ni, ni, r); /* (n+1)^2+n */
407 mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
408 /* We should subtract one: ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3). */
409 /* PTR (r) [0] ^= 1; would suffice, but it is not even needed, */
410 /* because the next division will shift away this bit anyway. */
411 /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
412 }
413 mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1 | (k>>1));
414 SIZ(r) -= PTR(r) [SIZ(r) - 1] == 0;
415 }
416 #endif
417 }
418 #endif
419 else
420 { /* k = 1 */
421 mpz_add_ui (r, ni, 1);
422 }
423 }
424 #if UZU_BIN_UIUI
425 else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
426 {
427 mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
428 }
429 #endif
430 else
431 {
432 mp_limb_t count;
433 mpz_t num, den;
434
435 mpz_init (num);
436 mpz_init (den);
437
438 #if KVAROPE
439 mpz_raising_fac4 (num, ni, k, den, r);
440 popc_limb (count, k);
441 ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
442 mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
443 #else
444 mpz_raising_fac (num, ni, k, den, r);
445 popc_limb (count, k);
446 ASSERT (k - (k >> 1) - count >= 0);
447 mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
448 #endif
449
450 mpz_oddfac_1(den, k, 0);
451
452 mpz_divexact(r, num, den);
453 mpz_clear (num);
454 mpz_clear (den);
455 }
456 mpz_clear (ni);
457
458 SIZ(r) = (SIZ(r) ^ -negate) + negate;
459 }