1  /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
       2     write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
       3     qxn is non-zero, generate that many fraction limbs and append them after the
       4     other quotient limbs, and update the remainder accordingly.  The input
       5     operands are unaffected.
       6  
       7     Preconditions:
       8     1. The most significant limb of the divisor must be non-zero.
       9     2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
      10  
      11     The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
      12     complexity of multiplication.
      13  
      14  Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
      15  
      16  This file is part of the GNU MP Library.
      17  
      18  The GNU MP Library is free software; you can redistribute it and/or modify
      19  it under the terms of either:
      20  
      21    * the GNU Lesser General Public License as published by the Free
      22      Software Foundation; either version 3 of the License, or (at your
      23      option) any later version.
      24  
      25  or
      26  
      27    * the GNU General Public License as published by the Free Software
      28      Foundation; either version 2 of the License, or (at your option) any
      29      later version.
      30  
      31  or both in parallel, as here.
      32  
      33  The GNU MP Library is distributed in the hope that it will be useful, but
      34  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      35  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      36  for more details.
      37  
      38  You should have received copies of the GNU General Public License and the
      39  GNU Lesser General Public License along with the GNU MP Library.  If not,
      40  see https://www.gnu.org/licenses/.  */
      41  
      42  #include "gmp-impl.h"
      43  #include "longlong.h"
      44  
      45  
      46  void
      47  mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
      48  	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
      49  {
      50    ASSERT_ALWAYS (qxn == 0);
      51  
      52    ASSERT (nn >= 0);
      53    ASSERT (dn >= 0);
      54    ASSERT (dn == 0 || dp[dn - 1] != 0);
      55    ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
      56    ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
      57  
      58    switch (dn)
      59      {
      60      case 0:
      61        DIVIDE_BY_ZERO;
      62  
      63      case 1:
      64        {
      65  	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
      66  	return;
      67        }
      68  
      69      case 2:
      70        {
      71  	mp_ptr n2p;
      72  	mp_limb_t qhl, cy;
      73  	TMP_DECL;
      74  	TMP_MARK;
      75  	if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
      76  	  {
      77  	    int cnt;
      78  	    mp_limb_t d2p[2];
      79  	    count_leading_zeros (cnt, dp[1]);
      80  	    cnt -= GMP_NAIL_BITS;
      81  	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
      82  	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
      83  	    n2p = TMP_ALLOC_LIMBS (nn + 1);
      84  	    cy = mpn_lshift (n2p, np, nn, cnt);
      85  	    n2p[nn] = cy;
      86  	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
      87  	    if (cy == 0)
      88  	      qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
      89  	    rp[0] = (n2p[0] >> cnt)
      90  	      | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
      91  	    rp[1] = (n2p[1] >> cnt);
      92  	  }
      93  	else
      94  	  {
      95  	    n2p = TMP_ALLOC_LIMBS (nn);
      96  	    MPN_COPY (n2p, np, nn);
      97  	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
      98  	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
      99  	    rp[0] = n2p[0];
     100  	    rp[1] = n2p[1];
     101  	  }
     102  	TMP_FREE;
     103  	return;
     104        }
     105  
     106      default:
     107        {
     108  	int adjust;
     109  	gmp_pi1_t dinv;
     110  	TMP_DECL;
     111  	TMP_MARK;
     112  	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
     113  	if (nn + adjust >= 2 * dn)
     114  	  {
     115  	    mp_ptr n2p, d2p;
     116  	    mp_limb_t cy;
     117  	    int cnt;
     118  
     119  	    qp[nn - dn] = 0;			  /* zero high quotient limb */
     120  	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
     121  	      {
     122  		count_leading_zeros (cnt, dp[dn - 1]);
     123  		cnt -= GMP_NAIL_BITS;
     124  		d2p = TMP_ALLOC_LIMBS (dn);
     125  		mpn_lshift (d2p, dp, dn, cnt);
     126  		n2p = TMP_ALLOC_LIMBS (nn + 1);
     127  		cy = mpn_lshift (n2p, np, nn, cnt);
     128  		n2p[nn] = cy;
     129  		nn += adjust;
     130  	      }
     131  	    else
     132  	      {
     133  		cnt = 0;
     134  		d2p = (mp_ptr) dp;
     135  		n2p = TMP_ALLOC_LIMBS (nn + 1);
     136  		MPN_COPY (n2p, np, nn);
     137  		n2p[nn] = 0;
     138  		nn += adjust;
     139  	      }
     140  
     141  	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
     142  	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
     143  	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
     144  	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
     145  		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
     146  		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
     147  		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
     148  	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
     149  	    else
     150  	      {
     151  		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
     152  		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
     153  		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
     154  		n2p = rp;
     155  	      }
     156  
     157  	    if (cnt != 0)
     158  	      mpn_rshift (rp, n2p, dn, cnt);
     159  	    else
     160  	      MPN_COPY (rp, n2p, dn);
     161  	    TMP_FREE;
     162  	    return;
     163  	  }
     164  
     165  	/* When we come here, the numerator/partial remainder is less
     166  	   than twice the size of the denominator.  */
     167  
     168  	  {
     169  	    /* Problem:
     170  
     171  	       Divide a numerator N with nn limbs by a denominator D with dn
     172  	       limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
     173  	       compared to dn, conventional division algorithms perform poorly.
     174  	       We want an algorithm that has an expected running time that is
     175  	       dependent only on qn.
     176  
     177  	       Algorithm (very informally stated):
     178  
     179  	       1) Divide the 2 x qn most significant limbs from the numerator
     180  		  by the qn most significant limbs from the denominator.  Call
     181  		  the result qest.  This is either the correct quotient, but
     182  		  might be 1 or 2 too large.  Compute the remainder from the
     183  		  division.  (This step is implemented by an mpn_divrem call.)
     184  
     185  	       2) Is the most significant limb from the remainder < p, where p
     186  		  is the product of the most significant limb from the quotient
     187  		  and the next(d)?  (Next(d) denotes the next ignored limb from
     188  		  the denominator.)  If it is, decrement qest, and adjust the
     189  		  remainder accordingly.
     190  
     191  	       3) Is the remainder >= qest?  If it is, qest is the desired
     192  		  quotient.  The algorithm terminates.
     193  
     194  	       4) Subtract qest x next(d) from the remainder.  If there is
     195  		  borrow out, decrement qest, and adjust the remainder
     196  		  accordingly.
     197  
     198  	       5) Skip one word from the denominator (i.e., let next(d) denote
     199  		  the next less significant limb.  */
     200  
     201  	    mp_size_t qn;
     202  	    mp_ptr n2p, d2p;
     203  	    mp_ptr tp;
     204  	    mp_limb_t cy;
     205  	    mp_size_t in, rn;
     206  	    mp_limb_t quotient_too_large;
     207  	    unsigned int cnt;
     208  
     209  	    qn = nn - dn;
     210  	    qp[qn] = 0;				/* zero high quotient limb */
     211  	    qn += adjust;			/* qn cannot become bigger */
     212  
     213  	    if (qn == 0)
     214  	      {
     215  		MPN_COPY (rp, np, dn);
     216  		TMP_FREE;
     217  		return;
     218  	      }
     219  
     220  	    in = dn - qn;		/* (at least partially) ignored # of limbs in ops */
     221  	    /* Normalize denominator by shifting it to the left such that its
     222  	       most significant bit is set.  Then shift the numerator the same
     223  	       amount, to mathematically preserve quotient.  */
     224  	    if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
     225  	      {
     226  		count_leading_zeros (cnt, dp[dn - 1]);
     227  		cnt -= GMP_NAIL_BITS;
     228  
     229  		d2p = TMP_ALLOC_LIMBS (qn);
     230  		mpn_lshift (d2p, dp + in, qn, cnt);
     231  		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
     232  
     233  		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
     234  		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
     235  		if (adjust)
     236  		  {
     237  		    n2p[2 * qn] = cy;
     238  		    n2p++;
     239  		  }
     240  		else
     241  		  {
     242  		    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
     243  		  }
     244  	      }
     245  	    else
     246  	      {
     247  		cnt = 0;
     248  		d2p = (mp_ptr) dp + in;
     249  
     250  		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
     251  		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
     252  		if (adjust)
     253  		  {
     254  		    n2p[2 * qn] = 0;
     255  		    n2p++;
     256  		  }
     257  	      }
     258  
     259  	    /* Get an approximate quotient using the extracted operands.  */
     260  	    if (qn == 1)
     261  	      {
     262  		mp_limb_t q0, r0;
     263  		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
     264  		n2p[0] = r0 >> GMP_NAIL_BITS;
     265  		qp[0] = q0;
     266  	      }
     267  	    else if (qn == 2)
     268  	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
     269  	    else
     270  	      {
     271  		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
     272  		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
     273  		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
     274  		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
     275  		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
     276  		else
     277  		  {
     278  		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
     279  		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
     280  		    mp_ptr r2p = rp;
     281  		    if (np == r2p)	/* If N and R share space, put ... */
     282  		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
     283  		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
     284  		    MPN_COPY (n2p, r2p, qn);
     285  		  }
     286  	      }
     287  
     288  	    rn = qn;
     289  	    /* Multiply the first ignored divisor limb by the most significant
     290  	       quotient limb.  If that product is > the partial remainder's
     291  	       most significant limb, we know the quotient is too large.  This
     292  	       test quickly catches most cases where the quotient is too large;
     293  	       it catches all cases where the quotient is 2 too large.  */
     294  	    {
     295  	      mp_limb_t dl, x;
     296  	      mp_limb_t h, dummy;
     297  
     298  	      if (in - 2 < 0)
     299  		dl = 0;
     300  	      else
     301  		dl = dp[in - 2];
     302  
     303  #if GMP_NAIL_BITS == 0
     304  	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
     305  #else
     306  	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
     307  	      if (cnt != 0)
     308  		x |= dl >> (GMP_NUMB_BITS - cnt);
     309  #endif
     310  	      umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
     311  
     312  	      if (n2p[qn - 1] < h)
     313  		{
     314  		  mp_limb_t cy;
     315  
     316  		  mpn_decr_u (qp, (mp_limb_t) 1);
     317  		  cy = mpn_add_n (n2p, n2p, d2p, qn);
     318  		  if (cy)
     319  		    {
     320  		      /* The partial remainder is safely large.  */
     321  		      n2p[qn] = cy;
     322  		      ++rn;
     323  		    }
     324  		}
     325  	    }
     326  
     327  	    quotient_too_large = 0;
     328  	    if (cnt != 0)
     329  	      {
     330  		mp_limb_t cy1, cy2;
     331  
     332  		/* Append partially used numerator limb to partial remainder.  */
     333  		cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
     334  		n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
     335  
     336  		/* Update partial remainder with partially used divisor limb.  */
     337  		cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
     338  		if (qn != rn)
     339  		  {
     340  		    ASSERT_ALWAYS (n2p[qn] >= cy2);
     341  		    n2p[qn] -= cy2;
     342  		  }
     343  		else
     344  		  {
     345  		    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
     346  
     347  		    quotient_too_large = (cy1 < cy2);
     348  		    ++rn;
     349  		  }
     350  		--in;
     351  	      }
     352  	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
     353  
     354  	    tp = TMP_ALLOC_LIMBS (dn);
     355  
     356  	    if (in < qn)
     357  	      {
     358  		if (in == 0)
     359  		  {
     360  		    MPN_COPY (rp, n2p, rn);
     361  		    ASSERT_ALWAYS (rn == dn);
     362  		    goto foo;
     363  		  }
     364  		mpn_mul (tp, qp, qn, dp, in);
     365  	      }
     366  	    else
     367  	      mpn_mul (tp, dp, in, qp, qn);
     368  
     369  	    cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
     370  	    MPN_COPY (rp + in, n2p, dn - in);
     371  	    quotient_too_large |= cy;
     372  	    cy = mpn_sub_n (rp, np, tp, in);
     373  	    cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
     374  	    quotient_too_large |= cy;
     375  	  foo:
     376  	    if (quotient_too_large)
     377  	      {
     378  		mpn_decr_u (qp, (mp_limb_t) 1);
     379  		mpn_add_n (rp, rp, dp, dn);
     380  	      }
     381  	  }
     382  	TMP_FREE;
     383  	return;
     384        }
     385      }
     386  }