1  /* mpn_gcdext -- Extended Greatest Common Divisor.
       2  
       3  Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
       4  
       5  This file is part of the GNU MP Library.
       6  
       7  The GNU MP Library is free software; you can redistribute it and/or modify
       8  it under the terms of either:
       9  
      10    * the GNU Lesser General Public License as published by the Free
      11      Software Foundation; either version 3 of the License, or (at your
      12      option) any later version.
      13  
      14  or
      15  
      16    * the GNU General Public License as published by the Free Software
      17      Foundation; either version 2 of the License, or (at your option) any
      18      later version.
      19  
      20  or both in parallel, as here.
      21  
      22  The GNU MP Library is distributed in the hope that it will be useful, but
      23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      25  for more details.
      26  
      27  You should have received copies of the GNU General Public License and the
      28  GNU Lesser General Public License along with the GNU MP Library.  If not,
      29  see https://www.gnu.org/licenses/.  */
      30  
      31  #include "gmp-impl.h"
      32  #include "longlong.h"
      33  
      34  #ifndef GCDEXT_1_USE_BINARY
      35  #define GCDEXT_1_USE_BINARY 0
      36  #endif
      37  
      38  #ifndef GCDEXT_1_BINARY_METHOD
      39  #define GCDEXT_1_BINARY_METHOD 2
      40  #endif
      41  
      42  #if GCDEXT_1_USE_BINARY
      43  
      44  mp_limb_t
      45  mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
      46  	      mp_limb_t u, mp_limb_t v)
      47  {
      48    /* Maintain
      49  
      50       U = t1 u + t0 v
      51       V = s1 u + s0 v
      52  
      53       where U, V are the inputs (without any shared power of two),
      54       and the matrix has determinant � 2^{shift}.
      55    */
      56    mp_limb_t s0 = 1;
      57    mp_limb_t t0 = 0;
      58    mp_limb_t s1 = 0;
      59    mp_limb_t t1 = 1;
      60    mp_limb_t ug;
      61    mp_limb_t vg;
      62    mp_limb_t ugh;
      63    mp_limb_t vgh;
      64    unsigned zero_bits;
      65    unsigned shift;
      66    unsigned i;
      67  #if GCDEXT_1_BINARY_METHOD == 2
      68    mp_limb_t det_sign;
      69  #endif
      70  
      71    ASSERT (u > 0);
      72    ASSERT (v > 0);
      73  
      74    count_trailing_zeros (zero_bits, u | v);
      75    u >>= zero_bits;
      76    v >>= zero_bits;
      77  
      78    if ((u & 1) == 0)
      79      {
      80        count_trailing_zeros (shift, u);
      81        u >>= shift;
      82        t1 <<= shift;
      83      }
      84    else if ((v & 1) == 0)
      85      {
      86        count_trailing_zeros (shift, v);
      87        v >>= shift;
      88        s0 <<= shift;
      89      }
      90    else
      91      shift = 0;
      92  
      93  #if GCDEXT_1_BINARY_METHOD == 1
      94    while (u != v)
      95      {
      96        unsigned count;
      97        if (u > v)
      98  	{
      99  	  u -= v;
     100  
     101  	  count_trailing_zeros (count, u);
     102  	  u >>= count;
     103  
     104  	  t0 += t1; t1 <<= count;
     105  	  s0 += s1; s1 <<= count;
     106  	}
     107        else
     108  	{
     109  	  v -= u;
     110  
     111  	  count_trailing_zeros (count, v);
     112  	  v >>= count;
     113  
     114  	  t1 += t0; t0 <<= count;
     115  	  s1 += s0; s0 <<= count;
     116  	}
     117        shift += count;
     118      }
     119  #else
     120  # if GCDEXT_1_BINARY_METHOD == 2
     121    u >>= 1;
     122    v >>= 1;
     123  
     124    det_sign = 0;
     125  
     126    while (u != v)
     127      {
     128        unsigned count;
     129        mp_limb_t d =  u - v;
     130        mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
     131        mp_limb_t sx;
     132        mp_limb_t tx;
     133  
     134        /* When v <= u (vgtu == 0), the updates are:
     135  
     136  	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
     137  	   (t1, t0) <-- (t1 << count, t0 + t1)
     138  
     139  	 and when v > 0, the updates are
     140  
     141  	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
     142  	   (t1, t0) <-- (t0 << count, t0 + t1)
     143  
     144  	 and similarly for s1, s0
     145        */
     146  
     147        /* v <-- min (u, v) */
     148        v += (vgtu & d);
     149  
     150        /* u <-- |u - v| */
     151        u = (d ^ vgtu) - vgtu;
     152  
     153        /* Number of trailing zeros is the same no matter if we look at
     154         * d or u, but using d gives more parallelism. */
     155        count_trailing_zeros (count, d);
     156  
     157        det_sign ^= vgtu;
     158  
     159        tx = vgtu & (t0 - t1);
     160        sx = vgtu & (s0 - s1);
     161        t0 += t1;
     162        s0 += s1;
     163        t1 += tx;
     164        s1 += sx;
     165  
     166        count++;
     167        u >>= count;
     168        t1 <<= count;
     169        s1 <<= count;
     170        shift += count;
     171      }
     172    u = (u << 1) + 1;
     173  # else /* GCDEXT_1_BINARY_METHOD == 2 */
     174  #  error Unknown GCDEXT_1_BINARY_METHOD
     175  # endif
     176  #endif
     177  
     178    /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
     179    ug = t0 + t1;
     180    vg = s0 + s1;
     181  
     182    ugh = ug/2 + (ug & 1);
     183    vgh = vg/2 + (vg & 1);
     184  
     185    /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
     186       s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
     187    for (i = 0; i < shift; i++)
     188      {
     189        mp_limb_t mask = - ( (s0 | t0) & 1);
     190  
     191        s0 /= 2;
     192        t0 /= 2;
     193        s0 += mask & vgh;
     194        t0 += mask & ugh;
     195      }
     196  
     197    ASSERT_ALWAYS (s0 <= vg);
     198    ASSERT_ALWAYS (t0 <= ug);
     199  
     200    if (s0 > vg - s0)
     201      {
     202        s0 -= vg;
     203        t0 -= ug;
     204      }
     205  #if GCDEXT_1_BINARY_METHOD == 2
     206    /* Conditional negation. */
     207    s0 = (s0 ^ det_sign) - det_sign;
     208    t0 = (t0 ^ det_sign) - det_sign;
     209  #endif
     210    *sp = s0;
     211    *tp = -t0;
     212  
     213    return u << zero_bits;
     214  }
     215  
     216  #else /* !GCDEXT_1_USE_BINARY */
     217  
     218  
     219  /* FIXME: Takes two single-word limbs. It could be extended to a
     220   * function that accepts a bignum for the first input, and only
     221   * returns the first co-factor. */
     222  
     223  mp_limb_t
     224  mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
     225  	      mp_limb_t a, mp_limb_t b)
     226  {
     227    /* Maintain
     228  
     229       a =  u0 A + v0 B
     230       b =  u1 A + v1 B
     231  
     232       where A, B are the original inputs.
     233    */
     234    mp_limb_signed_t u0 = 1;
     235    mp_limb_signed_t v0 = 0;
     236    mp_limb_signed_t u1 = 0;
     237    mp_limb_signed_t v1 = 1;
     238  
     239    ASSERT (a > 0);
     240    ASSERT (b > 0);
     241  
     242    if (a < b)
     243      goto divide_by_b;
     244  
     245    for (;;)
     246      {
     247        mp_limb_t q;
     248  
     249        q = a / b;
     250        a -= q * b;
     251  
     252        if (a == 0)
     253  	{
     254  	  *up = u1;
     255  	  *vp = v1;
     256  	  return b;
     257  	}
     258        u0 -= q * u1;
     259        v0 -= q * v1;
     260  
     261      divide_by_b:
     262        q = b / a;
     263        b -= q * a;
     264  
     265        if (b == 0)
     266  	{
     267  	  *up = u0;
     268  	  *vp = v0;
     269  	  return a;
     270  	}
     271        u1 -= q * u0;
     272        v1 -= q * v0;
     273      }
     274  }
     275  #endif /* !GCDEXT_1_USE_BINARY */