1  /* mpn_divrem_1 -- mpn by limb division.
       2  
       3  Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
       4  Foundation, Inc.
       5  
       6  This file is part of the GNU MP Library.
       7  
       8  The GNU MP Library is free software; you can redistribute it and/or modify
       9  it under the terms of either:
      10  
      11    * the GNU Lesser General Public License as published by the Free
      12      Software Foundation; either version 3 of the License, or (at your
      13      option) any later version.
      14  
      15  or
      16  
      17    * the GNU General Public License as published by the Free Software
      18      Foundation; either version 2 of the License, or (at your option) any
      19      later version.
      20  
      21  or both in parallel, as here.
      22  
      23  The GNU MP Library is distributed in the hope that it will be useful, but
      24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      26  for more details.
      27  
      28  You should have received copies of the GNU General Public License and the
      29  GNU Lesser General Public License along with the GNU MP Library.  If not,
      30  see https://www.gnu.org/licenses/.  */
      31  
      32  #include "gmp-impl.h"
      33  #include "longlong.h"
      34  
      35  
      36  /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
      37     meaning the quotient size where that should happen, the quotient size
      38     being how many udiv divisions will be done.
      39  
      40     The default is to use preinv always, CPUs where this doesn't suit have
      41     tuned thresholds.  Note in particular that preinv should certainly be
      42     used if that's the only division available (USE_PREINV_ALWAYS).  */
      43  
      44  #ifndef DIVREM_1_NORM_THRESHOLD
      45  #define DIVREM_1_NORM_THRESHOLD  0
      46  #endif
      47  #ifndef DIVREM_1_UNNORM_THRESHOLD
      48  #define DIVREM_1_UNNORM_THRESHOLD  0
      49  #endif
      50  
      51  
      52  
      53  /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
      54     and UNNORM thresholds are 0 and only the inversion code is included.
      55  
      56     If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
      57     will be MP_SIZE_T_MAX and only the plain division code is included.
      58  
      59     Otherwise mul-by-inverse is better than plain division above some
      60     threshold, and best results are obtained by having code for both present.
      61  
      62     The main reason for separating the norm and unnorm cases is that not all
      63     CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
      64     code used on an already normalized divisor.
      65  
      66     If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
      67     non-shifting code for both the norm and unnorm cases, though with
      68     different criteria for skipping a division, and with different thresholds
      69     of course.  And in fact if inversion is never viable, then that simple
      70     non-shifting division would be all that's left.
      71  
      72     The NORM and UNNORM thresholds might not differ much, but if there's
      73     going to be separate code for norm and unnorm then it makes sense to have
      74     separate thresholds.  One thing that's possible is that the
      75     mul-by-inverse might be better only for normalized divisors, due to that
      76     case not needing variable bit shifts.
      77  
      78     Notice that the thresholds are tested after the decision to possibly skip
      79     one divide step, so they're based on the actual number of divisions done.
      80  
      81     For the unnorm case, it would be possible to call mpn_lshift to adjust
      82     the dividend all in one go (into the quotient space say), rather than
      83     limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
      84     than what the compiler can generate for EXTRACT.  But this is left to CPU
      85     specific implementations to consider, especially since EXTRACT isn't on
      86     the dependent chain.  */
      87  
      88  mp_limb_t
      89  mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
      90  	      mp_srcptr up, mp_size_t un, mp_limb_t d)
      91  {
      92    mp_size_t  n;
      93    mp_size_t  i;
      94    mp_limb_t  n1, n0;
      95    mp_limb_t  r = 0;
      96  
      97    ASSERT (qxn >= 0);
      98    ASSERT (un >= 0);
      99    ASSERT (d != 0);
     100    /* FIXME: What's the correct overlap rule when qxn!=0? */
     101    ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
     102  
     103    n = un + qxn;
     104    if (n == 0)
     105      return 0;
     106  
     107    d <<= GMP_NAIL_BITS;
     108  
     109    qp += (n - 1);   /* Make qp point at most significant quotient limb */
     110  
     111    if ((d & GMP_LIMB_HIGHBIT) != 0)
     112      {
     113        if (un != 0)
     114  	{
     115  	  /* High quotient limb is 0 or 1, skip a divide step. */
     116  	  mp_limb_t q;
     117  	  r = up[un - 1] << GMP_NAIL_BITS;
     118  	  q = (r >= d);
     119  	  *qp-- = q;
     120  	  r -= (d & -q);
     121  	  r >>= GMP_NAIL_BITS;
     122  	  n--;
     123  	  un--;
     124  	}
     125  
     126        if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
     127  	{
     128  	plain:
     129  	  for (i = un - 1; i >= 0; i--)
     130  	    {
     131  	      n0 = up[i] << GMP_NAIL_BITS;
     132  	      udiv_qrnnd (*qp, r, r, n0, d);
     133  	      r >>= GMP_NAIL_BITS;
     134  	      qp--;
     135  	    }
     136  	  for (i = qxn - 1; i >= 0; i--)
     137  	    {
     138  	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
     139  	      r >>= GMP_NAIL_BITS;
     140  	      qp--;
     141  	    }
     142  	  return r;
     143  	}
     144        else
     145  	{
     146  	  /* Multiply-by-inverse, divisor already normalized. */
     147  	  mp_limb_t dinv;
     148  	  invert_limb (dinv, d);
     149  
     150  	  for (i = un - 1; i >= 0; i--)
     151  	    {
     152  	      n0 = up[i] << GMP_NAIL_BITS;
     153  	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
     154  	      r >>= GMP_NAIL_BITS;
     155  	      qp--;
     156  	    }
     157  	  for (i = qxn - 1; i >= 0; i--)
     158  	    {
     159  	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
     160  	      r >>= GMP_NAIL_BITS;
     161  	      qp--;
     162  	    }
     163  	  return r;
     164  	}
     165      }
     166    else
     167      {
     168        /* Most significant bit of divisor == 0.  */
     169        int cnt;
     170  
     171        /* Skip a division if high < divisor (high quotient 0).  Testing here
     172  	 before normalizing will still skip as often as possible.  */
     173        if (un != 0)
     174  	{
     175  	  n1 = up[un - 1] << GMP_NAIL_BITS;
     176  	  if (n1 < d)
     177  	    {
     178  	      r = n1 >> GMP_NAIL_BITS;
     179  	      *qp-- = 0;
     180  	      n--;
     181  	      if (n == 0)
     182  		return r;
     183  	      un--;
     184  	    }
     185  	}
     186  
     187        if (! UDIV_NEEDS_NORMALIZATION
     188  	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
     189  	goto plain;
     190  
     191        count_leading_zeros (cnt, d);
     192        d <<= cnt;
     193        r <<= cnt;
     194  
     195        if (UDIV_NEEDS_NORMALIZATION
     196  	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
     197  	{
     198  	  mp_limb_t nshift;
     199  	  if (un != 0)
     200  	    {
     201  	      n1 = up[un - 1] << GMP_NAIL_BITS;
     202  	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
     203  	      for (i = un - 2; i >= 0; i--)
     204  		{
     205  		  n0 = up[i] << GMP_NAIL_BITS;
     206  		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
     207  		  udiv_qrnnd (*qp, r, r, nshift, d);
     208  		  r >>= GMP_NAIL_BITS;
     209  		  qp--;
     210  		  n1 = n0;
     211  		}
     212  	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
     213  	      r >>= GMP_NAIL_BITS;
     214  	      qp--;
     215  	    }
     216  	  for (i = qxn - 1; i >= 0; i--)
     217  	    {
     218  	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
     219  	      r >>= GMP_NAIL_BITS;
     220  	      qp--;
     221  	    }
     222  	  return r >> cnt;
     223  	}
     224        else
     225  	{
     226  	  mp_limb_t  dinv, nshift;
     227  	  invert_limb (dinv, d);
     228  	  if (un != 0)
     229  	    {
     230  	      n1 = up[un - 1] << GMP_NAIL_BITS;
     231  	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
     232  	      for (i = un - 2; i >= 0; i--)
     233  		{
     234  		  n0 = up[i] << GMP_NAIL_BITS;
     235  		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
     236  		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
     237  		  r >>= GMP_NAIL_BITS;
     238  		  qp--;
     239  		  n1 = n0;
     240  		}
     241  	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
     242  	      r >>= GMP_NAIL_BITS;
     243  	      qp--;
     244  	    }
     245  	  for (i = qxn - 1; i >= 0; i--)
     246  	    {
     247  	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
     248  	      r >>= GMP_NAIL_BITS;
     249  	      qp--;
     250  	    }
     251  	  return r >> cnt;
     252  	}
     253      }
     254  }