1  /* mpn_div_q -- division for arbitrary size operands.
       2  
       3     Contributed to the GNU project by Torbjorn Granlund.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
       8  
       9  Copyright 2009, 2010, 2015, 2018 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  #include "gmp-impl.h"
      38  #include "longlong.h"
      39  
      40  
      41  /* Compute Q = N/D with truncation.
      42       N = {np,nn}
      43       D = {dp,dn}
      44       Q = {qp,nn-dn+1}
      45       T = {scratch,nn+1} is scratch space
      46     N and D are both untouched by the computation.
      47     N and T may overlap; pass the same space if N is irrelevant after the call,
      48     but note that tp needs an extra limb.
      49  
      50     Operand requirements:
      51       N >= D > 0
      52       dp[dn-1] != 0
      53       No overlap between the N, D, and Q areas.
      54  
      55     This division function does not clobber its input operands, since it is
      56     intended to support average-O(qn) division, and for that to be effective, it
      57     cannot put requirements on callers to copy a O(nn) operand.
      58  
      59     If a caller does not care about the value of {np,nn+1} after calling this
      60     function, it should pass np also for the scratch argument.  This function
      61     will then save some time and space by avoiding allocation and copying.
      62     (FIXME: Is this a good design?  We only really save any copying for
      63     already-normalised divisors, which should be rare.  It also prevents us from
      64     reasonably asking for all scratch space we need.)
      65  
      66     We write nn-dn+1 limbs for the quotient, but return void.  Why not return
      67     the most significant quotient limb?  Look at the 4 main code blocks below
      68     (consisting of an outer if-else where each arm contains an if-else). It is
      69     tricky for the first code block, since the mpn_*_div_q calls will typically
      70     generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
      71     we generate the most significant quotient limb here, before calling
      72     mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
      73     critical division case (the SB sub-case in particular) copying is not a good
      74     idea.
      75  
      76     It might make sense to split the if-else parts of the (qn + FUDGE
      77     >= dn) blocks into separate functions, since we could promise quite
      78     different things to callers in these two cases.  The 'then' case
      79     benefits from np=scratch, and it could perhaps even tolerate qp=np,
      80     saving some headache for many callers.
      81  
      82     FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
      83     operands, we do not reuse the huge scratch for adjustments.  This can be a
      84     serious waste of memory for the largest operands.
      85  */
      86  
      87  /* FUDGE determines when to try getting an approximate quotient from the upper
      88     parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
      89     for the code to be correct.  */
      90  #define FUDGE 5			/* FIXME: tune this */
      91  
      92  #define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
      93  #define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
      94  #define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
      95  #ifndef MUPI_DIVAPPR_Q_THRESHOLD
      96  #define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
      97  #endif
      98  
      99  void
     100  mpn_div_q (mp_ptr qp,
     101  	   mp_srcptr np, mp_size_t nn,
     102  	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
     103  {
     104    mp_ptr new_dp, new_np, tp, rp;
     105    mp_limb_t cy, dh, qh;
     106    mp_size_t new_nn, qn;
     107    gmp_pi1_t dinv;
     108    int cnt;
     109    TMP_DECL;
     110    TMP_MARK;
     111  
     112    ASSERT (nn >= dn);
     113    ASSERT (dn > 0);
     114    ASSERT (dp[dn - 1] != 0);
     115    ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
     116    ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
     117    ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
     118  
     119    ASSERT_ALWAYS (FUDGE >= 2);
     120  
     121    dh = dp[dn - 1];
     122    if (dn == 1)
     123      {
     124        mpn_divrem_1 (qp, 0L, np, nn, dh);
     125        return;
     126      }
     127  
     128    qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
     129  
     130    if (qn + FUDGE >= dn)
     131      {
     132        /* |________________________|
     133                            |_______|  */
     134        new_np = scratch;
     135  
     136        if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
     137  	{
     138  	  count_leading_zeros (cnt, dh);
     139  
     140  	  cy = mpn_lshift (new_np, np, nn, cnt);
     141  	  new_np[nn] = cy;
     142  	  new_nn = nn + (cy != 0);
     143  
     144  	  new_dp = TMP_ALLOC_LIMBS (dn);
     145  	  mpn_lshift (new_dp, dp, dn, cnt);
     146  
     147  	  if (dn == 2)
     148  	    {
     149  	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
     150  	    }
     151  	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
     152  		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
     153  	    {
     154  	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
     155  	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
     156  	    }
     157  	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
     158  		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
     159  		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
     160  		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
     161  	    {
     162  	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
     163  	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
     164  	    }
     165  	  else
     166  	    {
     167  	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
     168  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
     169  	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
     170  	    }
     171  	  if (cy == 0)
     172  	    qp[qn - 1] = qh;
     173  	  else
     174  	    ASSERT (qh == 0);
     175  	}
     176        else  /* divisor is already normalised */
     177  	{
     178  	  if (new_np != np)
     179  	    MPN_COPY (new_np, np, nn);
     180  
     181  	  if (dn == 2)
     182  	    {
     183  	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
     184  	    }
     185  	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
     186  		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
     187  	    {
     188  	      invert_pi1 (dinv, dh, dp[dn - 2]);
     189  	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
     190  	    }
     191  	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
     192  		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
     193  		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
     194  		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
     195  	    {
     196  	      invert_pi1 (dinv, dh, dp[dn - 2]);
     197  	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
     198  	    }
     199  	  else
     200  	    {
     201  	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
     202  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
     203  	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
     204  	    }
     205  	  qp[nn - dn] = qh;
     206  	}
     207      }
     208    else
     209      {
     210        /* |________________________|
     211                  |_________________|  */
     212        tp = TMP_ALLOC_LIMBS (qn + 1);
     213  
     214        new_np = scratch;
     215        new_nn = 2 * qn + 1;
     216        if (new_np == np)
     217  	/* We need {np,nn} to remain untouched until the final adjustment, so
     218  	   we need to allocate separate space for new_np.  */
     219  	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
     220  
     221  
     222        if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
     223  	{
     224  	  count_leading_zeros (cnt, dh);
     225  
     226  	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
     227  	  new_np[new_nn] = cy;
     228  
     229  	  new_nn += (cy != 0);
     230  
     231  	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
     232  	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
     233  	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
     234  
     235  	  if (qn + 1 == 2)
     236  	    {
     237  	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
     238  	    }
     239  	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
     240  	    {
     241  	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
     242  	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
     243  	    }
     244  	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
     245  	    {
     246  	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
     247  	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
     248  	    }
     249  	  else
     250  	    {
     251  	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
     252  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
     253  	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
     254  	    }
     255  	  if (cy == 0)
     256  	    tp[qn] = qh;
     257  	  else if (UNLIKELY (qh != 0))
     258  	    {
     259  	      /* This happens only when the quotient is close to B^n and
     260  		 mpn_*_divappr_q returned B^n.  */
     261  	      mp_size_t i, n;
     262  	      n = new_nn - (qn + 1);
     263  	      for (i = 0; i < n; i++)
     264  		tp[i] = GMP_NUMB_MAX;
     265  	      qh = 0;		/* currently ignored */
     266  	    }
     267  	}
     268        else  /* divisor is already normalised */
     269  	{
     270  	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
     271  
     272  	  new_dp = (mp_ptr) dp + dn - (qn + 1);
     273  
     274  	  if (qn == 2 - 1)
     275  	    {
     276  	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
     277  	    }
     278  	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
     279  	    {
     280  	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
     281  	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
     282  	    }
     283  	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
     284  	    {
     285  	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
     286  	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
     287  	    }
     288  	  else
     289  	    {
     290  	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
     291  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
     292  	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
     293  	    }
     294  	  tp[qn] = qh;
     295  	}
     296  
     297        MPN_COPY (qp, tp + 1, qn);
     298        if (tp[0] <= 4)
     299          {
     300  	  mp_size_t rn;
     301  
     302            rp = TMP_ALLOC_LIMBS (dn + qn);
     303            mpn_mul (rp, dp, dn, tp + 1, qn);
     304  	  rn = dn + qn;
     305  	  rn -= rp[rn - 1] == 0;
     306  
     307            if (rn > nn || mpn_cmp (np, rp, nn) < 0)
     308              MPN_DECR_U (qp, qn, 1);
     309          }
     310      }
     311  
     312    TMP_FREE;
     313  }