1  /* mpn_mu_div_q.
       2  
       3     Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
       4  
       5     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       6     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
       8  
       9  Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  /*
      39     The idea of the algorithm used herein is to compute a smaller inverted value
      40     than used in the standard Barrett algorithm, and thus save time in the
      41     Newton iterations, and pay just a small price when using the inverted value
      42     for developing quotient bits.  This algorithm was presented at ICMS 2006.
      43  */
      44  
      45  /*
      46    Things to work on:
      47  
      48    1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
      49       probably close to optimal, except when mpn_mu_divappr_q fails.
      50  
      51    2. We used to fall back to mpn_mu_div_qr when we detect a possible
      52       mpn_mu_divappr_q rounding problem, now we multiply and compare.
      53       Unfortunately, since mpn_mu_divappr_q does not return the partial
      54       remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
      55       solve that.
      56  
      57    3. The allocations done here should be made from the scratch area, which
      58       then would need to be amended.
      59  */
      60  
      61  #include <stdlib.h>		/* for NULL */
      62  #include "gmp-impl.h"
      63  
      64  
      65  mp_limb_t
      66  mpn_mu_div_q (mp_ptr qp,
      67  	      mp_srcptr np, mp_size_t nn,
      68  	      mp_srcptr dp, mp_size_t dn,
      69  	      mp_ptr scratch)
      70  {
      71    mp_ptr tp, rp;
      72    mp_size_t qn;
      73    mp_limb_t cy, qh;
      74    TMP_DECL;
      75  
      76    TMP_MARK;
      77  
      78    qn = nn - dn;
      79  
      80    tp = TMP_BALLOC_LIMBS (qn + 1);
      81  
      82    if (qn >= dn)			/* nn >= 2*dn + 1 */
      83      {
      84         /* |_______________________|   dividend
      85  			 |________|   divisor  */
      86  
      87        rp = TMP_BALLOC_LIMBS (nn + 1);
      88        MPN_COPY (rp + 1, np, nn);
      89        rp[0] = 0;
      90  
      91        qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
      92        if (qh != 0)
      93  	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
      94  
      95        cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
      96  
      97        if (UNLIKELY (cy != 0))
      98  	{
      99  	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
     100  	     canonically reduced, replace the returned value of B^(qn-dn)+eps
     101  	     by the largest possible value.  */
     102  	  mp_size_t i;
     103  	  for (i = 0; i < qn + 1; i++)
     104  	    tp[i] = GMP_NUMB_MAX;
     105  	}
     106  
     107        /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
     108  	 smaller than the max error, we cannot trust the quotient.  */
     109        if (tp[0] > 4)
     110  	{
     111  	  MPN_COPY (qp, tp + 1, qn);
     112  	}
     113        else
     114  	{
     115  	  mp_limb_t cy;
     116  	  mp_ptr pp;
     117  
     118  	  pp = rp;
     119  	  mpn_mul (pp, tp + 1, qn, dp, dn);
     120  
     121  	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
     122  
     123  	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
     124  	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
     125  	  else /* Same as above */
     126  	    MPN_COPY (qp, tp + 1, qn);
     127  	}
     128      }
     129    else
     130      {
     131         /* |_______________________|   dividend
     132  		 |________________|   divisor  */
     133  
     134        /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
     135  	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
     136  	 the most significant dn-1 limbs will actually be read, but it is not
     137  	 pretty.  */
     138  
     139        qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
     140  			     dp + dn - (qn + 1), qn + 1, scratch);
     141  
     142        /* The max error of mpn_mu_divappr_q is +4, but we get an additional
     143           error from the divisor truncation.  */
     144        if (tp[0] > 6)
     145  	{
     146  	  MPN_COPY (qp, tp + 1, qn);
     147  	}
     148        else
     149  	{
     150  	  mp_limb_t cy;
     151  
     152  	  /* FIXME: a shorter product should be enough; we may use already
     153  	     allocated space... */
     154  	  rp = TMP_BALLOC_LIMBS (nn);
     155  	  mpn_mul (rp, dp, dn, tp + 1, qn);
     156  
     157  	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
     158  
     159  	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
     160  	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
     161  	  else /* Same as above */
     162  	    MPN_COPY (qp, tp + 1, qn);
     163  	}
     164      }
     165  
     166    TMP_FREE;
     167    return qh;
     168  }
     169  
     170  mp_size_t
     171  mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
     172  {
     173    mp_size_t qn;
     174  
     175    qn = nn - dn;
     176    if (qn >= dn)
     177      {
     178        return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
     179      }
     180    else
     181      {
     182        return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
     183      }
     184  }