1 /* mpn_compute_powtab.
2
3 Contributed to the GNU project by Torbjorn Granlund.
4
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 1991-2017 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 /*
38 CAVEATS:
39 * The exptab and powtab vectors are in opposite orders. Probably OK.
40 * Consider getting rid of exptab, doing bit ops on the un argument instead.
41 * Consider rounding greatest power slightly upwards to save adjustments.
42 * In powtab_decide, consider computing cost from just the 2-3 largest
43 operands, since smaller operand contribute little. This makes most sense
44 if exptab is suppressed.
45 */
46
47 #include "gmp-impl.h"
48
49 #ifndef DIV_1_VS_MUL_1_PERCENT
50 #define DIV_1_VS_MUL_1_PERCENT 150
51 #endif
52
53 #define SET_powers_t(dest, ptr, size, dib, b, sh) \
54 do { \
55 dest.p = ptr; \
56 dest.n = size; \
57 dest.digits_in_base = dib; \
58 dest.base = b; \
59 dest.shift = sh; \
60 } while (0)
61
62 #if DIV_1_VS_MUL_1_PERCENT > 120
63 #define HAVE_mpn_compute_powtab_mul 1
64 static void
65 mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
66 int base, const size_t *exptab, size_t n_pows)
67 {
68 mp_size_t n;
69 mp_ptr p, t;
70 mp_limb_t cy;
71 long start_idx;
72 int c;
73
74 mp_limb_t big_base = mp_bases[base].big_base;
75 int chars_per_limb = mp_bases[base].chars_per_limb;
76
77 mp_ptr powtab_mem_ptr = powtab_mem;
78
79 size_t digits_in_base = chars_per_limb;
80
81 powers_t *pt = powtab;
82
83 p = powtab_mem_ptr;
84 powtab_mem_ptr += 1;
85 p[0] = big_base;
86
87 SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
88 pt++;
89
90 t = powtab_mem_ptr;
91 powtab_mem_ptr += 2;
92 t[1] = mpn_mul_1 (t, p, 1, big_base);
93 n = 2;
94
95 digits_in_base *= 2;
96
97 c = t[0] == 0;
98 t += c;
99 n -= c;
100 mp_size_t shift = c;
101
102 SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
103 p = t;
104 pt++;
105
106 if (exptab[0] == ((size_t) chars_per_limb << n_pows))
107 {
108 start_idx = n_pows - 2;
109 }
110 else
111 {
112 if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
113 {
114 /* 3, sometimes adjusted to 4. */
115 t = powtab_mem_ptr;
116 powtab_mem_ptr += 4;
117 t[n] = cy = mpn_mul_1 (t, p, n, big_base);
118 n += cy != 0;;
119
120 digits_in_base += chars_per_limb;
121
122 c = t[0] == 0;
123 t += c;
124 n -= c;
125 shift += c;
126 }
127 else
128 {
129 /* 2 copy, will always become 3 with back-multiplication. */
130 t = powtab_mem_ptr;
131 powtab_mem_ptr += 3;
132 t[0] = p[0];
133 t[1] = p[1];
134 }
135
136 SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
137 p = t;
138 pt++;
139 start_idx = n_pows - 3;
140 }
141
142 for (long pi = start_idx; pi >= 0; pi--)
143 {
144 t = powtab_mem_ptr;
145 powtab_mem_ptr += 2 * n + 2;
146
147 ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
148
149 mpn_sqr (t, p, n);
150
151 digits_in_base *= 2;
152 n *= 2;
153 n -= t[n - 1] == 0;
154 shift *= 2;
155
156 c = t[0] == 0;
157 t += c;
158 n -= c;
159 shift += c;
160
161 /* Adjust new value if it is too small as input to the next squaring. */
162 if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
163 {
164 t[n] = cy = mpn_mul_1 (t, t, n, big_base);
165 n += cy != 0;
166
167 digits_in_base += chars_per_limb;
168
169 c = t[0] == 0;
170 t += c;
171 n -= c;
172 shift += c;
173 }
174
175 SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
176
177 /* Adjust previous value if it is not at its target power. */
178 if (pt[-1].digits_in_base < exptab[pi + 1])
179 {
180 mp_size_t n = pt[-1].n;
181 mp_ptr p = pt[-1].p;
182 p[n] = cy = mpn_mul_1 (p, p, n, big_base);
183 n += cy != 0;
184
185 ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
186 pt[-1].digits_in_base = exptab[pi + 1];
187
188 c = p[0] == 0;
189 pt[-1].p = p + c;
190 pt[-1].n = n - c;
191 pt[-1].shift += c;
192 }
193
194 p = t;
195 pt++;
196 }
197 }
198 #endif
199
200 #if DIV_1_VS_MUL_1_PERCENT < 275
201 #define HAVE_mpn_compute_powtab_div 1
202 static void
203 mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
204 int base, const size_t *exptab, size_t n_pows)
205 {
206 mp_ptr p, t;
207
208 mp_limb_t big_base = mp_bases[base].big_base;
209 int chars_per_limb = mp_bases[base].chars_per_limb;
210
211 mp_ptr powtab_mem_ptr = powtab_mem;
212
213 size_t digits_in_base = chars_per_limb;
214
215 powers_t *pt = powtab;
216
217 p = powtab_mem_ptr;
218 powtab_mem_ptr += 1;
219 p[0] = big_base;
220
221 SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
222 pt++;
223
224 mp_size_t n = 1;
225 mp_size_t shift = 0;
226 for (long pi = n_pows - 1; pi >= 0; pi--)
227 {
228 t = powtab_mem_ptr;
229 powtab_mem_ptr += 2 * n;
230
231 ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
232
233 mpn_sqr (t, p, n);
234 n = 2 * n - 1; n += t[n] != 0;
235 digits_in_base *= 2;
236
237 if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */
238 {
239 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
240 if (__GMP_LIKELY (base == 10))
241 mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
242 MP_BASES_BIG_BASE_BINVERTED_10,
243 MP_BASES_BIG_BASE_CTZ_10);
244 else
245 #endif
246 /* FIXME: We could use _pi1 here if we add big_base_binverted and
247 big_base_ctz fields to struct bases. That would add about 2 KiB
248 to mp_bases.c.
249 FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
250 mpn_bdiv_q_1 for more machines. */
251 mpn_divexact_1 (t, t, n, big_base);
252
253 n -= t[n - 1] == 0;
254 digits_in_base -= chars_per_limb;
255 }
256
257 shift *= 2;
258 /* Strip low zero limbs, but be careful to keep the result divisible by
259 big_base. */
260 while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
261 {
262 t++;
263 n--;
264 shift++;
265 }
266 p = t;
267
268 SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
269 pt++;
270 }
271
272 /* Strip any remaining low zero limbs. */
273 pt -= n_pows + 1;
274 for (long pi = n_pows; pi >= 0; pi--)
275 {
276 mp_ptr t = pt[pi].p;
277 mp_size_t shift = pt[pi].shift;
278 mp_size_t n = pt[pi].n;
279 int c;
280 c = t[0] == 0;
281 t += c;
282 n -= c;
283 shift += c;
284 pt[pi].p = t;
285 pt[pi].shift = shift;
286 pt[pi].n = n;
287 }
288 }
289 #endif
290
291 static long
292 powtab_decide (size_t *exptab, size_t un, int base)
293 {
294 int chars_per_limb = mp_bases[base].chars_per_limb;
295 long n_pows = 0;
296 for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
297 {
298 exptab[n_pows] = pn * chars_per_limb;
299 n_pows++;
300 }
301 exptab[n_pows] = chars_per_limb;
302
303 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
304 size_t pn = un - 1;
305 size_t xn = (un + 1) >> 1;
306 unsigned mcost = 1;
307 unsigned dcost = 1;
308 for (long i = n_pows - 2; i >= 0; i--)
309 {
310 size_t pow = (pn >> (i + 1)) + 1;
311
312 if (pow & 1)
313 dcost += pow;
314
315 if (xn != (pow << i))
316 {
317 if (pow > 2 && (pow & 1) == 0)
318 mcost += 2 * pow;
319 else
320 mcost += pow;
321 }
322 else
323 {
324 if (pow & 1)
325 mcost += pow;
326 }
327 }
328
329 dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
330
331 if (mcost <= dcost)
332 return n_pows;
333 else
334 return -n_pows;
335 #elif HAVE_mpn_compute_powtab_mul
336 return n_pows;
337 #elif HAVE_mpn_compute_powtab_div
338 return -n_pows;
339 #else
340 #error "no powtab function available"
341 #endif
342 }
343
344 size_t
345 mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
346 {
347 size_t exptab[GMP_LIMB_BITS];
348
349 long n_pows = powtab_decide (exptab, un, base);
350
351 #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
352 if (n_pows >= 0)
353 {
354 mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
355 return n_pows;
356 }
357 else
358 {
359 mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
360 return -n_pows;
361 }
362 #elif HAVE_mpn_compute_powtab_mul
363 ASSERT (n_pows > 0);
364 mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
365 return n_pows;
366 #elif HAVE_mpn_compute_powtab_div
367 ASSERT (n_pows < 0);
368 mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
369 return -n_pows;
370 #else
371 #error "no powtab function available"
372 #endif
373 }