1  /* mpn_compute_powtab.
       2  
       3     Contributed to the GNU project by Torbjorn Granlund.
       4  
       5     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       6     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 1991-2017 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  /*
      38    CAVEATS:
      39    * The exptab and powtab vectors are in opposite orders.  Probably OK.
      40    * Consider getting rid of exptab, doing bit ops on the un argument instead.
      41    * Consider rounding greatest power slightly upwards to save adjustments.
      42    * In powtab_decide, consider computing cost from just the 2-3 largest
      43      operands, since smaller operand contribute little.  This makes most sense
      44      if exptab is suppressed.
      45  */
      46  
      47  #include "gmp-impl.h"
      48  
      49  #ifndef DIV_1_VS_MUL_1_PERCENT
      50  #define DIV_1_VS_MUL_1_PERCENT 150
      51  #endif
      52  
      53  #define SET_powers_t(dest, ptr, size, dib, b, sh)	\
      54    do {							\
      55      dest.p = ptr;					\
      56      dest.n = size;					\
      57      dest.digits_in_base = dib;				\
      58      dest.base = b;					\
      59      dest.shift = sh;					\
      60    } while (0)
      61  
      62  #if DIV_1_VS_MUL_1_PERCENT > 120
      63  #define HAVE_mpn_compute_powtab_mul 1
      64  static void
      65  mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
      66  			int base, const size_t *exptab, size_t n_pows)
      67  {
      68    mp_size_t n;
      69    mp_ptr p, t;
      70    mp_limb_t cy;
      71    long start_idx;
      72    int c;
      73  
      74    mp_limb_t big_base = mp_bases[base].big_base;
      75    int chars_per_limb = mp_bases[base].chars_per_limb;
      76  
      77    mp_ptr powtab_mem_ptr = powtab_mem;
      78  
      79    size_t digits_in_base = chars_per_limb;
      80  
      81    powers_t *pt = powtab;
      82  
      83    p = powtab_mem_ptr;
      84    powtab_mem_ptr += 1;
      85    p[0] = big_base;
      86  
      87    SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
      88    pt++;
      89  
      90    t = powtab_mem_ptr;
      91    powtab_mem_ptr += 2;
      92    t[1] = mpn_mul_1 (t, p, 1, big_base);
      93    n = 2;
      94  
      95    digits_in_base *= 2;
      96  
      97    c = t[0] == 0;
      98    t += c;
      99    n -= c;
     100    mp_size_t shift = c;
     101  
     102    SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
     103    p = t;
     104    pt++;
     105  
     106    if (exptab[0] == ((size_t) chars_per_limb << n_pows))
     107      {
     108        start_idx = n_pows - 2;
     109      }
     110    else
     111      {
     112        if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0])
     113  	{
     114  	  /* 3, sometimes adjusted to 4.  */
     115  	  t = powtab_mem_ptr;
     116  	  powtab_mem_ptr += 4;
     117  	  t[n] = cy = mpn_mul_1 (t, p, n, big_base);
     118  	  n += cy != 0;;
     119  
     120  	  digits_in_base += chars_per_limb;
     121  
     122  	  c  = t[0] == 0;
     123  	  t += c;
     124  	  n -= c;
     125  	  shift += c;
     126  	}
     127        else
     128  	{
     129  	  /* 2 copy, will always become 3 with back-multiplication.  */
     130  	  t = powtab_mem_ptr;
     131  	  powtab_mem_ptr += 3;
     132  	  t[0] = p[0];
     133  	  t[1] = p[1];
     134  	}
     135  
     136        SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
     137        p = t;
     138        pt++;
     139        start_idx = n_pows - 3;
     140      }
     141  
     142    for (long pi = start_idx; pi >= 0; pi--)
     143      {
     144        t = powtab_mem_ptr;
     145        powtab_mem_ptr += 2 * n + 2;
     146  
     147        ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
     148  
     149        mpn_sqr (t, p, n);
     150  
     151        digits_in_base *= 2;
     152        n *= 2;
     153        n -= t[n - 1] == 0;
     154        shift *= 2;
     155  
     156        c = t[0] == 0;
     157        t += c;
     158        n -= c;
     159        shift += c;
     160  
     161        /* Adjust new value if it is too small as input to the next squaring.  */
     162        if (((digits_in_base + chars_per_limb) << pi) <= exptab[0])
     163  	{
     164  	  t[n] = cy = mpn_mul_1 (t, t, n, big_base);
     165  	  n += cy != 0;
     166  
     167  	  digits_in_base += chars_per_limb;
     168  
     169  	  c  = t[0] == 0;
     170  	  t += c;
     171  	  n -= c;
     172  	  shift += c;
     173  	}
     174  
     175        SET_powers_t (pt[0], t, n, digits_in_base, base, shift);
     176  
     177        /* Adjust previous value if it is not at its target power.  */
     178        if (pt[-1].digits_in_base < exptab[pi + 1])
     179  	{
     180  	  mp_size_t n = pt[-1].n;
     181  	  mp_ptr p = pt[-1].p;
     182  	  p[n] = cy = mpn_mul_1 (p, p, n, big_base);
     183  	  n += cy != 0;
     184  
     185  	  ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]);
     186  	  pt[-1].digits_in_base = exptab[pi + 1];
     187  
     188  	  c = p[0] == 0;
     189  	  pt[-1].p = p + c;
     190  	  pt[-1].n = n - c;
     191  	  pt[-1].shift += c;
     192  	}
     193  
     194        p = t;
     195        pt++;
     196      }
     197  }
     198  #endif
     199  
     200  #if DIV_1_VS_MUL_1_PERCENT < 275
     201  #define HAVE_mpn_compute_powtab_div 1
     202  static void
     203  mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un,
     204  			int base, const size_t *exptab, size_t n_pows)
     205  {
     206    mp_ptr p, t;
     207  
     208    mp_limb_t big_base = mp_bases[base].big_base;
     209    int chars_per_limb = mp_bases[base].chars_per_limb;
     210  
     211    mp_ptr powtab_mem_ptr = powtab_mem;
     212  
     213    size_t digits_in_base = chars_per_limb;
     214  
     215    powers_t *pt = powtab;
     216  
     217    p = powtab_mem_ptr;
     218    powtab_mem_ptr += 1;
     219    p[0] = big_base;
     220  
     221    SET_powers_t (pt[0], p, 1, digits_in_base, base, 0);
     222    pt++;
     223  
     224    mp_size_t n = 1;
     225    mp_size_t shift = 0;
     226    for (long pi = n_pows - 1; pi >= 0; pi--)
     227      {
     228        t = powtab_mem_ptr;
     229        powtab_mem_ptr += 2 * n;
     230  
     231        ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un));
     232  
     233        mpn_sqr (t, p, n);
     234        n = 2 * n - 1; n += t[n] != 0;
     235        digits_in_base *= 2;
     236  
     237        if (digits_in_base != exptab[pi])	/* if ((((un - 1) >> pi) & 2) == 0) */
     238  	{
     239  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
     240  	  if (__GMP_LIKELY (base == 10))
     241  	    mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10,
     242  			      MP_BASES_BIG_BASE_BINVERTED_10,
     243  			      MP_BASES_BIG_BASE_CTZ_10);
     244  	  else
     245  #endif
     246  	    /* FIXME: We could use _pi1 here if we add big_base_binverted and
     247  	       big_base_ctz fields to struct bases.  That would add about 2 KiB
     248  	       to mp_bases.c.
     249  	       FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to
     250  	       mpn_bdiv_q_1 for more machines. */
     251  	    mpn_divexact_1 (t, t, n, big_base);
     252  
     253  	  n -= t[n - 1] == 0;
     254  	  digits_in_base -= chars_per_limb;
     255  	}
     256  
     257        shift *= 2;
     258        /* Strip low zero limbs, but be careful to keep the result divisible by
     259  	 big_base.  */
     260        while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0)
     261  	{
     262  	  t++;
     263  	  n--;
     264  	  shift++;
     265  	}
     266        p = t;
     267  
     268        SET_powers_t (pt[0], p, n, digits_in_base, base, shift);
     269        pt++;
     270      }
     271  
     272    /* Strip any remaining low zero limbs.  */
     273    pt -= n_pows + 1;
     274    for (long pi = n_pows; pi >= 0; pi--)
     275      {
     276        mp_ptr t = pt[pi].p;
     277        mp_size_t shift = pt[pi].shift;
     278        mp_size_t n = pt[pi].n;
     279        int c;
     280        c = t[0] == 0;
     281        t += c;
     282        n -= c;
     283        shift += c;
     284        pt[pi].p = t;
     285        pt[pi].shift = shift;
     286        pt[pi].n = n;
     287      }
     288  }
     289  #endif
     290  
     291  static long
     292  powtab_decide (size_t *exptab, size_t un, int base)
     293  {
     294    int chars_per_limb = mp_bases[base].chars_per_limb;
     295    long n_pows = 0;
     296    for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1)
     297      {
     298        exptab[n_pows] = pn * chars_per_limb;
     299        n_pows++;
     300      }
     301    exptab[n_pows] = chars_per_limb;
     302  
     303  #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
     304    size_t pn = un - 1;
     305    size_t xn = (un + 1) >> 1;
     306    unsigned mcost = 1;
     307    unsigned dcost = 1;
     308    for (long i = n_pows - 2; i >= 0; i--)
     309      {
     310        size_t pow = (pn >> (i + 1)) + 1;
     311  
     312        if (pow & 1)
     313  	dcost += pow;
     314  
     315        if (xn != (pow << i))
     316  	{
     317  	  if (pow > 2 && (pow & 1) == 0)
     318  	    mcost += 2 * pow;
     319  	  else
     320  	    mcost += pow;
     321  	}
     322        else
     323  	{
     324  	  if (pow & 1)
     325  	    mcost += pow;
     326  	}
     327      }
     328  
     329    dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100;
     330  
     331    if (mcost <= dcost)
     332      return n_pows;
     333    else
     334      return -n_pows;
     335  #elif HAVE_mpn_compute_powtab_mul
     336    return n_pows;
     337  #elif HAVE_mpn_compute_powtab_div
     338    return -n_pows;
     339  #else
     340  #error "no powtab function available"
     341  #endif
     342  }
     343  
     344  size_t
     345  mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
     346  {
     347    size_t exptab[GMP_LIMB_BITS];
     348  
     349    long n_pows = powtab_decide (exptab, un, base);
     350  
     351  #if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div
     352    if (n_pows >= 0)
     353      {
     354        mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
     355        return n_pows;
     356      }
     357    else
     358      {
     359        mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
     360        return -n_pows;
     361      }
     362  #elif HAVE_mpn_compute_powtab_mul
     363    ASSERT (n_pows > 0);
     364    mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows);
     365    return n_pows;
     366  #elif HAVE_mpn_compute_powtab_div
     367    ASSERT (n_pows < 0);
     368    mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows);
     369    return -n_pows;
     370  #else
     371  #error "no powtab function available"
     372  #endif
     373  }