1  /* hgcd2.c
       2  
       3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       6  
       7  Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation,
       8  Inc.
       9  
      10  This file is part of the GNU MP Library.
      11  
      12  The GNU MP Library is free software; you can redistribute it and/or modify
      13  it under the terms of either:
      14  
      15    * the GNU Lesser General Public License as published by the Free
      16      Software Foundation; either version 3 of the License, or (at your
      17      option) any later version.
      18  
      19  or
      20  
      21    * the GNU General Public License as published by the Free Software
      22      Foundation; either version 2 of the License, or (at your option) any
      23      later version.
      24  
      25  or both in parallel, as here.
      26  
      27  The GNU MP Library is distributed in the hope that it will be useful, but
      28  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      29  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      30  for more details.
      31  
      32  You should have received copies of the GNU General Public License and the
      33  GNU Lesser General Public License along with the GNU MP Library.  If not,
      34  see https://www.gnu.org/licenses/.  */
      35  
      36  #include "gmp-impl.h"
      37  #include "longlong.h"
      38  
      39  #include "mpn/generic/hgcd2-div.h"
      40  
      41  #if GMP_NAIL_BITS != 0
      42  #error Nails not implemented
      43  #endif
      44  
      45  /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
      46     matrix M. Returns 1 if we make progress, i.e. can perform at least
      47     one subtraction. Otherwise returns zero. */
      48  
      49  /* FIXME: Possible optimizations:
      50  
      51     The div2 function starts with checking the most significant bit of
      52     the numerator. We can maintained normalized operands here, call
      53     hgcd with normalized operands only, which should make the code
      54     simpler and possibly faster.
      55  
      56     Experiment with table lookups on the most significant bits.
      57  
      58     This function is also a candidate for assembler implementation.
      59  */
      60  int
      61  mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
      62  	   struct hgcd_matrix1 *M)
      63  {
      64    mp_limb_t u00, u01, u10, u11;
      65  
      66    if (ah < 2 || bh < 2)
      67      return 0;
      68  
      69    if (ah > bh || (ah == bh && al > bl))
      70      {
      71        sub_ddmmss (ah, al, ah, al, bh, bl);
      72        if (ah < 2)
      73  	return 0;
      74  
      75        u00 = u01 = u11 = 1;
      76        u10 = 0;
      77      }
      78    else
      79      {
      80        sub_ddmmss (bh, bl, bh, bl, ah, al);
      81        if (bh < 2)
      82  	return 0;
      83  
      84        u00 = u10 = u11 = 1;
      85        u01 = 0;
      86      }
      87  
      88    if (ah < bh)
      89      goto subtract_a;
      90  
      91    for (;;)
      92      {
      93        ASSERT (ah >= bh);
      94        if (ah == bh)
      95  	goto done;
      96  
      97        if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
      98  	{
      99  	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
     100  	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
     101  
     102  	  break;
     103  	}
     104  
     105        /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
     106  	 1), affecting the second column of M. */
     107        ASSERT (ah > bh);
     108        sub_ddmmss (ah, al, ah, al, bh, bl);
     109  
     110        if (ah < 2)
     111  	goto done;
     112  
     113        if (ah <= bh)
     114  	{
     115  	  /* Use q = 1 */
     116  	  u01 += u00;
     117  	  u11 += u10;
     118  	}
     119        else
     120  	{
     121  	  mp_limb_t r[2];
     122  	  mp_limb_t q = div2 (r, ah, al, bh, bl);
     123  	  al = r[0]; ah = r[1];
     124  	  if (ah < 2)
     125  	    {
     126  	      /* A is too small, but q is correct. */
     127  	      u01 += q * u00;
     128  	      u11 += q * u10;
     129  	      goto done;
     130  	    }
     131  	  q++;
     132  	  u01 += q * u00;
     133  	  u11 += q * u10;
     134  	}
     135      subtract_a:
     136        ASSERT (bh >= ah);
     137        if (ah == bh)
     138  	goto done;
     139  
     140        if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
     141  	{
     142  	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
     143  	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
     144  
     145  	  goto subtract_a1;
     146  	}
     147  
     148        /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
     149  	 1), affecting the first column of M. */
     150        sub_ddmmss (bh, bl, bh, bl, ah, al);
     151  
     152        if (bh < 2)
     153  	goto done;
     154  
     155        if (bh <= ah)
     156  	{
     157  	  /* Use q = 1 */
     158  	  u00 += u01;
     159  	  u10 += u11;
     160  	}
     161        else
     162  	{
     163  	  mp_limb_t r[2];
     164  	  mp_limb_t q = div2 (r, bh, bl, ah, al);
     165  	  bl = r[0]; bh = r[1];
     166  	  if (bh < 2)
     167  	    {
     168  	      /* B is too small, but q is correct. */
     169  	      u00 += q * u01;
     170  	      u10 += q * u11;
     171  	      goto done;
     172  	    }
     173  	  q++;
     174  	  u00 += q * u01;
     175  	  u10 += q * u11;
     176  	}
     177      }
     178  
     179    /* NOTE: Since we discard the least significant half limb, we don't get a
     180       truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */
     181    /* Single precision loop */
     182    for (;;)
     183      {
     184        ASSERT (ah >= bh);
     185  
     186        ah -= bh;
     187        if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
     188  	break;
     189  
     190        if (ah <= bh)
     191  	{
     192  	  /* Use q = 1 */
     193  	  u01 += u00;
     194  	  u11 += u10;
     195  	}
     196        else
     197  	{
     198  	  mp_double_limb_t rq = div1 (ah, bh);
     199  	  mp_limb_t q = rq.d1;
     200  	  ah = rq.d0;
     201  
     202  	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
     203  	    {
     204  	      /* A is too small, but q is correct. */
     205  	      u01 += q * u00;
     206  	      u11 += q * u10;
     207  	      break;
     208  	    }
     209  	  q++;
     210  	  u01 += q * u00;
     211  	  u11 += q * u10;
     212  	}
     213      subtract_a1:
     214        ASSERT (bh >= ah);
     215  
     216        bh -= ah;
     217        if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
     218  	break;
     219  
     220        if (bh <= ah)
     221  	{
     222  	  /* Use q = 1 */
     223  	  u00 += u01;
     224  	  u10 += u11;
     225  	}
     226        else
     227  	{
     228  	  mp_double_limb_t rq = div1 (bh, ah);
     229  	  mp_limb_t q = rq.d1;
     230  	  bh = rq.d0;
     231  
     232  	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
     233  	    {
     234  	      /* B is too small, but q is correct. */
     235  	      u00 += q * u01;
     236  	      u10 += q * u11;
     237  	      break;
     238  	    }
     239  	  q++;
     240  	  u00 += q * u01;
     241  	  u10 += q * u11;
     242  	}
     243      }
     244  
     245   done:
     246    M->u[0][0] = u00; M->u[0][1] = u01;
     247    M->u[1][0] = u10; M->u[1][1] = u11;
     248  
     249    return 1;
     250  }
     251  
     252  /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
     253   * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
     254  mp_size_t
     255  mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
     256  			     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
     257  {
     258    mp_limb_t ah, bh;
     259  
     260    /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
     261  
     262       r  = u00 * a
     263       r += u10 * b
     264       b *= u11
     265       b += u01 * a
     266    */
     267  
     268  #if HAVE_NATIVE_mpn_addaddmul_1msb0
     269    ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
     270    bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
     271  #else
     272    ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
     273    ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
     274  
     275    bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
     276    bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
     277  #endif
     278    rp[n] = ah;
     279    bp[n] = bh;
     280  
     281    n += (ah | bh) > 0;
     282    return n;
     283  }