1  /* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
       2     Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
       3     Return the single-limb remainder.
       4     There are no constraints on the value of the divisor.
       5  
       6  Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012, 2020
       7  Free Software Foundation, Inc.
       8  
       9  This file is part of the GNU MP Library.
      10  
      11  The GNU MP Library is free software; you can redistribute it and/or modify
      12  it under the terms of either:
      13  
      14    * the GNU Lesser General Public License as published by the Free
      15      Software Foundation; either version 3 of the License, or (at your
      16      option) any later version.
      17  
      18  or
      19  
      20    * the GNU General Public License as published by the Free Software
      21      Foundation; either version 2 of the License, or (at your option) any
      22      later version.
      23  
      24  or both in parallel, as here.
      25  
      26  The GNU MP Library is distributed in the hope that it will be useful, but
      27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      29  for more details.
      30  
      31  You should have received copies of the GNU General Public License and the
      32  GNU Lesser General Public License along with the GNU MP Library.  If not,
      33  see https://www.gnu.org/licenses/.  */
      34  
      35  #include "gmp-impl.h"
      36  #include "longlong.h"
      37  
      38  
      39  /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
      40     meaning the quotient size where that should happen, the quotient size
      41     being how many udiv divisions will be done.
      42  
      43     The default is to use preinv always, CPUs where this doesn't suit have
      44     tuned thresholds.  Note in particular that preinv should certainly be
      45     used if that's the only division available (USE_PREINV_ALWAYS).  */
      46  
      47  #ifndef MOD_1_NORM_THRESHOLD
      48  #define MOD_1_NORM_THRESHOLD  0
      49  #endif
      50  
      51  #ifndef MOD_1_UNNORM_THRESHOLD
      52  #define MOD_1_UNNORM_THRESHOLD  0
      53  #endif
      54  
      55  #ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
      56  #define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
      57  #endif
      58  
      59  #ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
      60  #define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
      61  #endif
      62  
      63  #ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
      64  #define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
      65  #endif
      66  
      67  #ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
      68  #define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
      69  #endif
      70  
      71  #if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
      72  /* Duplicates declarations in tune/speed.h */
      73  mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
      74  mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
      75  
      76  void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
      77  void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
      78  
      79  #undef mpn_mod_1_1p
      80  #define mpn_mod_1_1p(ap, n, b, pre)			     \
      81    (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
      82     : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
      83        : __gmpn_mod_1_1p (ap, n, b, pre)))
      84  
      85  #undef mpn_mod_1_1p_cps
      86  #define mpn_mod_1_1p_cps(pre, b)				\
      87    (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)		\
      88     : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)	\
      89        : __gmpn_mod_1_1p_cps (pre, b)))
      90  #endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
      91  
      92  
      93  /* The comments in mpn/generic/divrem_1.c apply here too.
      94  
      95     As noted in the algorithms section of the manual, the shifts in the loop
      96     for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
      97     by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
      98     skip a division where (a*2^n)%(d*2^n) can't then there's the same number
      99     of divide steps, though how often that happens depends on the assumed
     100     distributions of dividend and divisor.  In any case this idea is left to
     101     CPU specific implementations to consider.  */
     102  
     103  static mp_limb_t
     104  mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
     105  {
     106    mp_size_t  i;
     107    mp_limb_t  n1, n0, r;
     108    mp_limb_t  dummy;
     109    int cnt;
     110  
     111    ASSERT (un > 0);
     112    ASSERT (d != 0);
     113  
     114    /* Skip a division if high < divisor.  Having the test here before
     115       normalizing will still skip as often as possible.  */
     116    r = up[un - 1];
     117    if (r < d)
     118      {
     119        if (--un == 0)
     120  	return r;
     121      }
     122    else
     123      r = 0;
     124  
     125    d <<= GMP_NAIL_BITS;
     126  
     127    /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
     128       code above. */
     129    if (! UDIV_NEEDS_NORMALIZATION
     130        && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
     131      {
     132        for (i = un - 1; i >= 0; i--)
     133  	{
     134  	  n0 = up[i] << GMP_NAIL_BITS;
     135  	  udiv_qrnnd (dummy, r, r, n0, d);
     136  	  r >>= GMP_NAIL_BITS;
     137  	}
     138        return r;
     139      }
     140  
     141    count_leading_zeros (cnt, d);
     142    d <<= cnt;
     143  
     144    n1 = up[un - 1] << GMP_NAIL_BITS;
     145    r = (r << cnt) | (n1 >> (GMP_LIMB_BITS - cnt));
     146  
     147    if (UDIV_NEEDS_NORMALIZATION
     148        && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
     149      {
     150        mp_limb_t nshift;
     151        for (i = un - 2; i >= 0; i--)
     152  	{
     153  	  n0 = up[i] << GMP_NAIL_BITS;
     154  	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
     155  	  udiv_qrnnd (dummy, r, r, nshift, d);
     156  	  r >>= GMP_NAIL_BITS;
     157  	  n1 = n0;
     158  	}
     159        udiv_qrnnd (dummy, r, r, n1 << cnt, d);
     160        r >>= GMP_NAIL_BITS;
     161        return r >> cnt;
     162      }
     163    else
     164      {
     165        mp_limb_t inv, nshift;
     166        invert_limb (inv, d);
     167  
     168        for (i = un - 2; i >= 0; i--)
     169  	{
     170  	  n0 = up[i] << GMP_NAIL_BITS;
     171  	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
     172  	  udiv_rnnd_preinv (r, r, nshift, d, inv);
     173  	  r >>= GMP_NAIL_BITS;
     174  	  n1 = n0;
     175  	}
     176        udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
     177        r >>= GMP_NAIL_BITS;
     178        return r >> cnt;
     179      }
     180  }
     181  
     182  static mp_limb_t
     183  mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
     184  {
     185    mp_size_t  i;
     186    mp_limb_t  n0, r;
     187    mp_limb_t  dummy;
     188  
     189    ASSERT (un > 0);
     190  
     191    d <<= GMP_NAIL_BITS;
     192  
     193    ASSERT (d & GMP_LIMB_HIGHBIT);
     194  
     195    /* High limb is initial remainder, possibly with one subtract of
     196       d to get r<d.  */
     197    r = up[un - 1] << GMP_NAIL_BITS;
     198    if (r >= d)
     199      r -= d;
     200    r >>= GMP_NAIL_BITS;
     201    un--;
     202    if (un == 0)
     203      return r;
     204  
     205    if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
     206      {
     207        for (i = un - 1; i >= 0; i--)
     208  	{
     209  	  n0 = up[i] << GMP_NAIL_BITS;
     210  	  udiv_qrnnd (dummy, r, r, n0, d);
     211  	  r >>= GMP_NAIL_BITS;
     212  	}
     213        return r;
     214      }
     215    else
     216      {
     217        mp_limb_t  inv;
     218        invert_limb (inv, d);
     219        for (i = un - 1; i >= 0; i--)
     220  	{
     221  	  n0 = up[i] << GMP_NAIL_BITS;
     222  	  udiv_rnnd_preinv (r, r, n0, d, inv);
     223  	  r >>= GMP_NAIL_BITS;
     224  	}
     225        return r;
     226      }
     227  }
     228  
     229  mp_limb_t
     230  mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
     231  {
     232    ASSERT (n >= 0);
     233    ASSERT (b != 0);
     234  
     235    /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
     236       required by mpz/fdiv_r_ui.c and possibly other places.  */
     237    if (n == 0)
     238      return 0;
     239  
     240    if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
     241      {
     242        if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
     243  	{
     244  	  return mpn_mod_1_norm (ap, n, b);
     245  	}
     246        else
     247  	{
     248  	  mp_limb_t pre[4];
     249  	  mpn_mod_1_1p_cps (pre, b);
     250  	  return mpn_mod_1_1p (ap, n, b, pre);
     251  	}
     252      }
     253    else
     254      {
     255        if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
     256  	{
     257  	  return mpn_mod_1_unnorm (ap, n, b);
     258  	}
     259        else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
     260  	{
     261  	  mp_limb_t pre[4];
     262  	  mpn_mod_1_1p_cps (pre, b);
     263  	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
     264  	}
     265        else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
     266  	{
     267  	  mp_limb_t pre[5];
     268  	  mpn_mod_1s_2p_cps (pre, b);
     269  	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
     270  	}
     271        else
     272  	{
     273  	  mp_limb_t pre[7];
     274  	  mpn_mod_1s_4p_cps (pre, b);
     275  	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
     276  	}
     277      }
     278  }