1  /* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
       2     division algorithm.
       3  
       4     Contributed to the GNU project by Torbjorn Granlund.
       5  
       6     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       7     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       8     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
       9  
      10  Copyright 2007, 2009 Free Software Foundation, Inc.
      11  
      12  This file is part of the GNU MP Library.
      13  
      14  The GNU MP Library is free software; you can redistribute it and/or modify
      15  it under the terms of either:
      16  
      17    * the GNU Lesser General Public License as published by the Free
      18      Software Foundation; either version 3 of the License, or (at your
      19      option) any later version.
      20  
      21  or
      22  
      23    * the GNU General Public License as published by the Free Software
      24      Foundation; either version 2 of the License, or (at your option) any
      25      later version.
      26  
      27  or both in parallel, as here.
      28  
      29  The GNU MP Library is distributed in the hope that it will be useful, but
      30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      32  for more details.
      33  
      34  You should have received copies of the GNU General Public License and the
      35  GNU Lesser General Public License along with the GNU MP Library.  If not,
      36  see https://www.gnu.org/licenses/.  */
      37  
      38  
      39  #include "gmp-impl.h"
      40  #include "longlong.h"
      41  
      42  mp_limb_t
      43  mpn_sbpi1_div_q (mp_ptr qp,
      44  		 mp_ptr np, mp_size_t nn,
      45  		 mp_srcptr dp, mp_size_t dn,
      46  		 mp_limb_t dinv)
      47  {
      48    mp_limb_t qh;
      49    mp_size_t qn, i;
      50    mp_limb_t n1, n0;
      51    mp_limb_t d1, d0;
      52    mp_limb_t cy, cy1;
      53    mp_limb_t q;
      54    mp_limb_t flag;
      55  
      56    mp_size_t dn_orig = dn;
      57    mp_srcptr dp_orig = dp;
      58    mp_ptr np_orig = np;
      59  
      60    ASSERT (dn > 2);
      61    ASSERT (nn >= dn);
      62    ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
      63  
      64    np += nn;
      65  
      66    qn = nn - dn;
      67    if (qn + 1 < dn)
      68      {
      69        dp += dn - (qn + 1);
      70        dn = qn + 1;
      71      }
      72  
      73    qh = mpn_cmp (np - dn, dp, dn) >= 0;
      74    if (qh != 0)
      75      mpn_sub_n (np - dn, np - dn, dp, dn);
      76  
      77    qp += qn;
      78  
      79    dn -= 2;			/* offset dn by 2 for main division loops,
      80  				   saving two iterations in mpn_submul_1.  */
      81    d1 = dp[dn + 1];
      82    d0 = dp[dn + 0];
      83  
      84    np -= 2;
      85  
      86    n1 = np[1];
      87  
      88    for (i = qn - (dn + 2); i >= 0; i--)
      89      {
      90        np--;
      91        if (UNLIKELY (n1 == d1) && np[1] == d0)
      92  	{
      93  	  q = GMP_NUMB_MASK;
      94  	  mpn_submul_1 (np - dn, dp, dn + 2, q);
      95  	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
      96  	}
      97        else
      98  	{
      99  	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
     100  
     101  	  cy = mpn_submul_1 (np - dn, dp, dn, q);
     102  
     103  	  cy1 = n0 < cy;
     104  	  n0 = (n0 - cy) & GMP_NUMB_MASK;
     105  	  cy = n1 < cy1;
     106  	  n1 -= cy1;
     107  	  np[0] = n0;
     108  
     109  	  if (UNLIKELY (cy != 0))
     110  	    {
     111  	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
     112  	      q--;
     113  	    }
     114  	}
     115  
     116        *--qp = q;
     117      }
     118  
     119    flag = ~CNST_LIMB(0);
     120  
     121    if (dn >= 0)
     122      {
     123        for (i = dn; i > 0; i--)
     124  	{
     125  	  np--;
     126  	  if (UNLIKELY (n1 >= (d1 & flag)))
     127  	    {
     128  	      q = GMP_NUMB_MASK;
     129  	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
     130  
     131  	      if (UNLIKELY (n1 != cy))
     132  		{
     133  		  if (n1 < (cy & flag))
     134  		    {
     135  		      q--;
     136  		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
     137  		    }
     138  		  else
     139  		    flag = 0;
     140  		}
     141  	      n1 = np[1];
     142  	    }
     143  	  else
     144  	    {
     145  	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
     146  
     147  	      cy = mpn_submul_1 (np - dn, dp, dn, q);
     148  
     149  	      cy1 = n0 < cy;
     150  	      n0 = (n0 - cy) & GMP_NUMB_MASK;
     151  	      cy = n1 < cy1;
     152  	      n1 -= cy1;
     153  	      np[0] = n0;
     154  
     155  	      if (UNLIKELY (cy != 0))
     156  		{
     157  		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
     158  		  q--;
     159  		}
     160  	    }
     161  
     162  	  *--qp = q;
     163  
     164  	  /* Truncate operands.  */
     165  	  dn--;
     166  	  dp++;
     167  	}
     168  
     169        np--;
     170        if (UNLIKELY (n1 >= (d1 & flag)))
     171  	{
     172  	  q = GMP_NUMB_MASK;
     173  	  cy = mpn_submul_1 (np, dp, 2, q);
     174  
     175  	  if (UNLIKELY (n1 != cy))
     176  	    {
     177  	      if (n1 < (cy & flag))
     178  		{
     179  		  q--;
     180  		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
     181  		}
     182  	      else
     183  		flag = 0;
     184  	    }
     185  	  n1 = np[1];
     186  	}
     187        else
     188  	{
     189  	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
     190  
     191  	  np[0] = n0;
     192  	  np[1] = n1;
     193  	}
     194  
     195        *--qp = q;
     196      }
     197    ASSERT_ALWAYS (np[1] == n1);
     198    np += 2;
     199  
     200  
     201    dn = dn_orig;
     202    if (UNLIKELY (n1 < (dn & flag)))
     203      {
     204        mp_limb_t q, x;
     205  
     206        /* The quotient may be too large if the remainder is small.  Recompute
     207  	 for above ignored operand parts, until the remainder spills.
     208  
     209  	 FIXME: The quality of this code isn't the same as the code above.
     210  	 1. We don't compute things in an optimal order, high-to-low, in order
     211  	    to terminate as quickly as possible.
     212  	 2. We mess with pointers and sizes, adding and subtracting and
     213  	    adjusting to get things right.  It surely could be streamlined.
     214  	 3. The only termination criteria are that we determine that the
     215  	    quotient needs to be adjusted, or that we have recomputed
     216  	    everything.  We should stop when the remainder is so large
     217  	    that no additional subtracting could make it spill.
     218  	 4. If nothing else, we should not do two loops of submul_1 over the
     219  	    data, instead handle both the triangularization and chopping at
     220  	    once.  */
     221  
     222        x = n1;
     223  
     224        if (dn > 2)
     225  	{
     226  	  /* Compensate for triangularization.  */
     227  	  mp_limb_t y;
     228  
     229  	  dp = dp_orig;
     230  	  if (qn + 1 < dn)
     231  	    {
     232  	      dp += dn - (qn + 1);
     233  	      dn = qn + 1;
     234  	    }
     235  
     236  	  y = np[-2];
     237  
     238  	  for (i = dn - 3; i >= 0; i--)
     239  	    {
     240  	      q = qp[i];
     241  	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
     242  
     243  	      if (y < cy)
     244  		{
     245  		  if (x == 0)
     246  		    {
     247  		      cy = mpn_sub_1 (qp, qp, qn, 1);
     248  		      ASSERT_ALWAYS (cy == 0);
     249  		      return qh - cy;
     250  		    }
     251  		  x--;
     252  		}
     253  	      y -= cy;
     254  	    }
     255  	  np[-2] = y;
     256  	}
     257  
     258        dn = dn_orig;
     259        if (qn + 1 < dn)
     260  	{
     261  	  /* Compensate for ignored dividend and divisor tails.  */
     262  
     263  	  dp = dp_orig;
     264  	  np = np_orig;
     265  
     266  	  if (qh != 0)
     267  	    {
     268  	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
     269  	      if (cy != 0)
     270  		{
     271  		  if (x == 0)
     272  		    {
     273  		      if (qn != 0)
     274  			cy = mpn_sub_1 (qp, qp, qn, 1);
     275  		      return qh - cy;
     276  		    }
     277  		  x--;
     278  		}
     279  	    }
     280  
     281  	  if (qn == 0)
     282  	    return qh;
     283  
     284  	  for (i = dn - qn - 2; i >= 0; i--)
     285  	    {
     286  	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
     287  	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
     288  	      if (cy != 0)
     289  		{
     290  		  if (x == 0)
     291  		    {
     292  		      cy = mpn_sub_1 (qp, qp, qn, 1);
     293  		      return qh;
     294  		    }
     295  		  x--;
     296  		}
     297  	    }
     298  	}
     299      }
     300  
     301    return qh;
     302  }