1  /* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
       2  
       3     Contributed to the GNU project by Torbjorn Granlund.
       4  
       5     THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH MUTABLE
       6     INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
       7     IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
       8     FUTURE GNU MP RELEASE.
       9  
      10  Copyright 1991-2017 Free Software Foundation, Inc.
      11  
      12  This file is part of the GNU MP Library.
      13  
      14  The GNU MP Library is free software; you can redistribute it and/or modify
      15  it under the terms of either:
      16  
      17    * the GNU Lesser General Public License as published by the Free
      18      Software Foundation; either version 3 of the License, or (at your
      19      option) any later version.
      20  
      21  or
      22  
      23    * the GNU General Public License as published by the Free Software
      24      Foundation; either version 2 of the License, or (at your option) any
      25      later version.
      26  
      27  or both in parallel, as here.
      28  
      29  The GNU MP Library is distributed in the hope that it will be useful, but
      30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      32  for more details.
      33  
      34  You should have received copies of the GNU General Public License and the
      35  GNU Lesser General Public License along with the GNU MP Library.  If not,
      36  see https://www.gnu.org/licenses/.  */
      37  
      38  #include "gmp-impl.h"
      39  #include "longlong.h"
      40  
      41  /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
      42     base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
      43  
      44    A) Divide U repeatedly by B, generating a quotient and remainder, until the
      45       quotient becomes zero.  The remainders hold the converted digits.  Digits
      46       come out from right to left.  (Used in mpn_bc_get_str.)
      47  
      48    B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
      49       Then develop digits by multiplying the fraction repeatedly by b.  Digits
      50       come out from left to right.  (Currently not used herein, except for in
      51       code for converting single limbs to individual digits.)
      52  
      53    C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
      54       sqrt(U).  Then divide U by B^s, generating quotient and remainder.
      55       Recursively convert the quotient, then the remainder, using the
      56       precomputed powers.  Digits come out from left to right.  (Used in
      57       mpn_dc_get_str.)
      58  
      59    When using algorithm C, algorithm B might be suitable for basecase code,
      60    since the required b^g power will be readily accessible.
      61  
      62    Optimization ideas:
      63    1. The recursive function of (C) could use less temporary memory.  The powtab
      64       allocation could be trimmed with some computation, and the tmp area could
      65       be reduced, or perhaps eliminated if up is reused for both quotient and
      66       remainder (it is currently used just for remainder).
      67    2. Store the powers of (C) in normalized form, with the normalization count.
      68       Quotients will usually need to be left-shifted before each divide, and
      69       remainders will either need to be left-shifted of right-shifted.
      70    3. In the code for developing digits from a single limb, we could avoid using
      71       a full umul_ppmm except for the first (or first few) digits, provided base
      72       is even.  Subsequent digits can be developed using plain multiplication.
      73       (This saves on register-starved machines (read x86) and on all machines
      74       that generate the upper product half using a separate instruction (alpha,
      75       powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
      76    4. Separate mpn_dc_get_str basecase code from code for small conversions. The
      77       former code will have the exact right power readily available in the
      78       powtab parameter for dividing the current number into a fraction.  Convert
      79       that using algorithm B.
      80    5. Completely avoid division.  Compute the inverses of the powers now in
      81       powtab instead of the actual powers.
      82    6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
      83       about 30% (1-log(5)/log(10)).
      84  
      85    Basic structure of (C):
      86      mpn_get_str:
      87        if POW2_P (n)
      88  	...
      89        else
      90  	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
      91  	  mpn_bx_get_str (str, base, up, un);
      92  	else
      93  	  precompute_power_tables
      94  	  mpn_dc_get_str
      95  
      96      mpn_dc_get_str:
      97  	mpn_tdiv_qr
      98  	if (qn < GET_STR_DC_THRESHOLD)
      99  	  mpn_bc_get_str
     100  	else
     101  	  mpn_dc_get_str
     102  	if (rn < GET_STR_DC_THRESHOLD)
     103  	  mpn_bc_get_str
     104  	else
     105  	  mpn_dc_get_str
     106  
     107  
     108    The reason for the two threshold values is the cost of
     109    precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be
     110    considerably larger than GET_STR_DC_THRESHOLD.  */
     111  
     112  
     113  /* The x86s and m68020 have a quotient and remainder "div" instruction and
     114     gcc recognises an adjacent "/" and "%" can be combined using that.
     115     Elsewhere "/" and "%" are either separate instructions, or separate
     116     libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
     117     A multiply and subtract should be faster than a "%" in those cases.  */
     118  #if HAVE_HOST_CPU_FAMILY_x86            \
     119    || HAVE_HOST_CPU_m68020               \
     120    || HAVE_HOST_CPU_m68030               \
     121    || HAVE_HOST_CPU_m68040               \
     122    || HAVE_HOST_CPU_m68060               \
     123    || HAVE_HOST_CPU_m68360 /* CPU32 */
     124  #define udiv_qrnd_unnorm(q,r,n,d)       \
     125    do {                                  \
     126      mp_limb_t  __q = (n) / (d);         \
     127      mp_limb_t  __r = (n) % (d);         \
     128      (q) = __q;                          \
     129      (r) = __r;                          \
     130    } while (0)
     131  #else
     132  #define udiv_qrnd_unnorm(q,r,n,d)       \
     133    do {                                  \
     134      mp_limb_t  __q = (n) / (d);         \
     135      mp_limb_t  __r = (n) - __q*(d);     \
     136      (q) = __q;                          \
     137      (r) = __r;                          \
     138    } while (0)
     139  #endif
     140  
     141  
     142  /* Convert {up,un} to a string in base base, and put the result in str.
     143     Generate len characters, possibly padding with zeros to the left.  If len is
     144     zero, generate as many characters as required.  Return a pointer immediately
     145     after the last digit of the result string.  Complexity is O(un^2); intended
     146     for small conversions.  */
     147  static unsigned char *
     148  mpn_bc_get_str (unsigned char *str, size_t len,
     149  		mp_ptr up, mp_size_t un, int base)
     150  {
     151    mp_limb_t rl, ul;
     152    unsigned char *s;
     153    size_t l;
     154    /* Allocate memory for largest possible string, given that we only get here
     155       for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
     156       base is 3.  7/11 is an approximation to 1/log2(3).  */
     157  #if TUNE_PROGRAM_BUILD
     158  #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
     159  #else
     160  #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
     161  #endif
     162    unsigned char buf[BUF_ALLOC];
     163  #if TUNE_PROGRAM_BUILD
     164    mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
     165  #else
     166    mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
     167  #endif
     168  
     169    if (base == 10)
     170      {
     171        /* Special case code for base==10 so that the compiler has a chance to
     172  	 optimize things.  */
     173  
     174        MPN_COPY (rp + 1, up, un);
     175  
     176        s = buf + BUF_ALLOC;
     177        while (un > 1)
     178  	{
     179  	  int i;
     180  	  mp_limb_t frac, digit;
     181  	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
     182  					 MP_BASES_BIG_BASE_10,
     183  					 MP_BASES_BIG_BASE_INVERTED_10,
     184  					 MP_BASES_NORMALIZATION_STEPS_10);
     185  	  un -= rp[un] == 0;
     186  	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
     187  	  s -= MP_BASES_CHARS_PER_LIMB_10;
     188  #if HAVE_HOST_CPU_FAMILY_x86
     189  	  /* The code below turns out to be a bit slower for x86 using gcc.
     190  	     Use plain code.  */
     191  	  i = MP_BASES_CHARS_PER_LIMB_10;
     192  	  do
     193  	    {
     194  	      umul_ppmm (digit, frac, frac, 10);
     195  	      *s++ = digit;
     196  	    }
     197  	  while (--i);
     198  #else
     199  	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
     200  	     After a few umul_ppmm, we will have accumulated enough low zeros
     201  	     to use a plain multiply.  */
     202  	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
     203  	    {
     204  	      umul_ppmm (digit, frac, frac, 10);
     205  	      *s++ = digit;
     206  	    }
     207  	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
     208  	    {
     209  	      umul_ppmm (digit, frac, frac, 10);
     210  	      *s++ = digit;
     211  	    }
     212  	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
     213  	    {
     214  	      umul_ppmm (digit, frac, frac, 10);
     215  	      *s++ = digit;
     216  	    }
     217  	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
     218  	    {
     219  	      umul_ppmm (digit, frac, frac, 10);
     220  	      *s++ = digit;
     221  	    }
     222  	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
     223  					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
     224  					     : 0));
     225  	  frac = (frac + 0xf) >> 4;
     226  	  do
     227  	    {
     228  	      frac *= 10;
     229  	      digit = frac >> (GMP_LIMB_BITS - 4);
     230  	      *s++ = digit;
     231  	      frac &= (~(mp_limb_t) 0) >> 4;
     232  	    }
     233  	  while (--i);
     234  #endif
     235  	  s -= MP_BASES_CHARS_PER_LIMB_10;
     236  	}
     237  
     238        ul = rp[1];
     239        while (ul != 0)
     240  	{
     241  	  udiv_qrnd_unnorm (ul, rl, ul, 10);
     242  	  *--s = rl;
     243  	}
     244      }
     245    else /* not base 10 */
     246      {
     247        unsigned chars_per_limb;
     248        mp_limb_t big_base, big_base_inverted;
     249        unsigned normalization_steps;
     250  
     251        chars_per_limb = mp_bases[base].chars_per_limb;
     252        big_base = mp_bases[base].big_base;
     253        big_base_inverted = mp_bases[base].big_base_inverted;
     254        count_leading_zeros (normalization_steps, big_base);
     255  
     256        MPN_COPY (rp + 1, up, un);
     257  
     258        s = buf + BUF_ALLOC;
     259        while (un > 1)
     260  	{
     261  	  int i;
     262  	  mp_limb_t frac;
     263  	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
     264  					 big_base, big_base_inverted,
     265  					 normalization_steps);
     266  	  un -= rp[un] == 0;
     267  	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
     268  	  s -= chars_per_limb;
     269  	  i = chars_per_limb;
     270  	  do
     271  	    {
     272  	      mp_limb_t digit;
     273  	      umul_ppmm (digit, frac, frac, base);
     274  	      *s++ = digit;
     275  	    }
     276  	  while (--i);
     277  	  s -= chars_per_limb;
     278  	}
     279  
     280        ul = rp[1];
     281        while (ul != 0)
     282  	{
     283  	  udiv_qrnd_unnorm (ul, rl, ul, base);
     284  	  *--s = rl;
     285  	}
     286      }
     287  
     288    l = buf + BUF_ALLOC - s;
     289    while (l < len)
     290      {
     291        *str++ = 0;
     292        len--;
     293      }
     294    while (l != 0)
     295      {
     296        *str++ = *s++;
     297        l--;
     298      }
     299    return str;
     300  }
     301  
     302  
     303  /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
     304     the string in STR.  Generate LEN characters, possibly padding with zeros to
     305     the left.  If LEN is zero, generate as many characters as required.
     306     Return a pointer immediately after the last digit of the result string.
     307     This uses divide-and-conquer and is intended for large conversions.  */
     308  static unsigned char *
     309  mpn_dc_get_str (unsigned char *str, size_t len,
     310  		mp_ptr up, mp_size_t un,
     311  		const powers_t *powtab, mp_ptr tmp)
     312  {
     313    if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
     314      {
     315        if (un != 0)
     316  	str = mpn_bc_get_str (str, len, up, un, powtab->base);
     317        else
     318  	{
     319  	  while (len != 0)
     320  	    {
     321  	      *str++ = 0;
     322  	      len--;
     323  	    }
     324  	}
     325      }
     326    else
     327      {
     328        mp_ptr pwp, qp, rp;
     329        mp_size_t pwn, qn;
     330        mp_size_t sn;
     331  
     332        pwp = powtab->p;
     333        pwn = powtab->n;
     334        sn = powtab->shift;
     335  
     336        if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
     337  	{
     338  	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
     339  	}
     340        else
     341  	{
     342  	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
     343  	  rp = up;		/* pwn limbs for rp; overwrite up area */
     344  
     345  	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
     346  	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
     347  
     348  	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
     349  
     350  	  if (len != 0)
     351  	    len = len - powtab->digits_in_base;
     352  
     353  	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
     354  	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
     355  	}
     356      }
     357    return str;
     358  }
     359  
     360  /* There are no leading zeros on the digits generated at str, but that's not
     361     currently a documented feature.  The current mpz_out_str and mpz_get_str
     362     rely on it.  */
     363  
     364  size_t
     365  mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
     366  {
     367    mp_ptr powtab_mem;
     368    powers_t powtab[GMP_LIMB_BITS];
     369    int pi;
     370    size_t out_len;
     371    mp_ptr tmp;
     372    TMP_DECL;
     373  
     374    /* Special case zero, as the code below doesn't handle it.  */
     375    if (un == 0)
     376      {
     377        str[0] = 0;
     378        return 1;
     379      }
     380  
     381    if (POW2_P (base))
     382      {
     383        /* The base is a power of 2.  Convert from most significant end.  */
     384        mp_limb_t n1, n0;
     385        int bits_per_digit = mp_bases[base].big_base;
     386        int cnt;
     387        int bit_pos;
     388        mp_size_t i;
     389        unsigned char *s = str;
     390        mp_bitcnt_t bits;
     391  
     392        n1 = up[un - 1];
     393        count_leading_zeros (cnt, n1);
     394  
     395        /* BIT_POS should be R when input ends in least significant nibble,
     396  	 R + bits_per_digit * n when input ends in nth least significant
     397  	 nibble. */
     398  
     399        bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
     400        cnt = bits % bits_per_digit;
     401        if (cnt != 0)
     402  	bits += bits_per_digit - cnt;
     403        bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
     404  
     405        /* Fast loop for bit output.  */
     406        i = un - 1;
     407        for (;;)
     408  	{
     409  	  bit_pos -= bits_per_digit;
     410  	  while (bit_pos >= 0)
     411  	    {
     412  	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
     413  	      bit_pos -= bits_per_digit;
     414  	    }
     415  	  i--;
     416  	  if (i < 0)
     417  	    break;
     418  	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
     419  	  n1 = up[i];
     420  	  bit_pos += GMP_NUMB_BITS;
     421  	  *s++ = n0 | (n1 >> bit_pos);
     422  	}
     423  
     424        return s - str;
     425      }
     426  
     427    /* General case.  The base is not a power of 2.  */
     428  
     429    if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
     430      return mpn_bc_get_str (str, (size_t) 0, up, un, base) - str;
     431  
     432    TMP_MARK;
     433  
     434    /* Allocate one large block for the powers of big_base.  */
     435    powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
     436  
     437    /* Compute a table of powers, were the largest power is >= sqrt(U).  */
     438    size_t ndig;
     439    mp_size_t xn;
     440    DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
     441    xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
     442  
     443    pi = 1 + mpn_compute_powtab (powtab, powtab_mem, xn, base);
     444  
     445    /* Using our precomputed powers, now in powtab[], convert our number.  */
     446    tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
     447    out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
     448    TMP_FREE;
     449  
     450    return out_len;
     451  }