1  /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
       2  
       3  Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software
       4  Foundation, Inc.
       5  
       6  This file is part of the GNU MP Library.
       7  
       8  The GNU MP Library is free software; you can redistribute it and/or modify
       9  it under the terms of either:
      10  
      11    * the GNU Lesser General Public License as published by the Free
      12      Software Foundation; either version 3 of the License, or (at your
      13      option) any later version.
      14  
      15  or
      16  
      17    * the GNU General Public License as published by the Free Software
      18      Foundation; either version 2 of the License, or (at your option) any
      19      later version.
      20  
      21  or both in parallel, as here.
      22  
      23  The GNU MP Library is distributed in the hope that it will be useful, but
      24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      26  for more details.
      27  
      28  You should have received copies of the GNU General Public License and the
      29  GNU Lesser General Public License along with the GNU MP Library.  If not,
      30  see https://www.gnu.org/licenses/.  */
      31  
      32  #include "gmp-impl.h"
      33  #include "longlong.h"
      34  
      35  /* Uses the HGCD operation described in
      36  
      37       N. Möller, On Schönhage's algorithm and subquadratic integer gcd
      38       computation, Math. Comp. 77 (2008), 589-607.
      39  
      40    to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
      41    then uses Lehmer's algorithm.
      42  */
      43  
      44  /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
      45   * 2)/3, which gives a balanced multiplication in
      46   * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
      47   * performance. The matrix-vector multiplication is then
      48   * 4:1-unbalanced, with matrix elements of size n/6, and vector
      49   * elements of size p = 2n/3. */
      50  
      51  /* From analysis of the theoretical running time, it appears that when
      52   * multiplication takes time O(n^alpha), p should be chosen so that
      53   * the ratio of the time for the mpn_hgcd call, and the time for the
      54   * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
      55   * 1). */
      56  #ifdef TUNE_GCD_P
      57  #define P_TABLE_SIZE 10000
      58  mp_size_t p_table[P_TABLE_SIZE];
      59  #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
      60  #else
      61  #define CHOOSE_P(n) (2*(n) / 3)
      62  #endif
      63  
      64  struct gcd_ctx
      65  {
      66    mp_ptr gp;
      67    mp_size_t gn;
      68  };
      69  
      70  static void
      71  gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
      72  	  mp_srcptr qp, mp_size_t qn, int d)
      73  {
      74    struct gcd_ctx *ctx = (struct gcd_ctx *) p;
      75    MPN_COPY (ctx->gp, gp, gn);
      76    ctx->gn = gn;
      77  }
      78  
      79  mp_size_t
      80  mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
      81  {
      82    mp_size_t talloc;
      83    mp_size_t scratch;
      84    mp_size_t matrix_scratch;
      85  
      86    struct gcd_ctx ctx;
      87    mp_ptr tp;
      88    TMP_DECL;
      89  
      90    ASSERT (usize >= n);
      91    ASSERT (n > 0);
      92    ASSERT (vp[n-1] > 0);
      93  
      94    /* FIXME: Check for small sizes first, before setting up temporary
      95       storage etc. */
      96    talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
      97  
      98    /* For initial division */
      99    scratch = usize - n + 1;
     100    if (scratch > talloc)
     101      talloc = scratch;
     102  
     103  #if TUNE_GCD_P
     104    if (CHOOSE_P (n) > 0)
     105  #else
     106    if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
     107  #endif
     108      {
     109        mp_size_t hgcd_scratch;
     110        mp_size_t update_scratch;
     111        mp_size_t p = CHOOSE_P (n);
     112        mp_size_t scratch;
     113  #if TUNE_GCD_P
     114        /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
     115  	 is increasing */
     116        matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
     117        hgcd_scratch = mpn_hgcd_itch (n);
     118        update_scratch = 2*(n - 1);
     119  #else
     120        matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
     121        hgcd_scratch = mpn_hgcd_itch (n - p);
     122        update_scratch = p + n - 1;
     123  #endif
     124        scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
     125        if (scratch > talloc)
     126  	talloc = scratch;
     127      }
     128  
     129    TMP_MARK;
     130    tp = TMP_ALLOC_LIMBS(talloc);
     131  
     132    if (usize > n)
     133      {
     134        mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
     135  
     136        if (mpn_zero_p (up, n))
     137  	{
     138  	  MPN_COPY (gp, vp, n);
     139  	  ctx.gn = n;
     140  	  goto done;
     141  	}
     142      }
     143  
     144    ctx.gp = gp;
     145  
     146  #if TUNE_GCD_P
     147    while (CHOOSE_P (n) > 0)
     148  #else
     149    while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
     150  #endif
     151      {
     152        struct hgcd_matrix M;
     153        mp_size_t p = CHOOSE_P (n);
     154        mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
     155        mp_size_t nn;
     156        mpn_hgcd_matrix_init (&M, n - p, tp);
     157        nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
     158        if (nn > 0)
     159  	{
     160  	  ASSERT (M.n <= (n - p - 1)/2);
     161  	  ASSERT (M.n + p <= (p + n - 1) / 2);
     162  	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
     163  	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
     164  	}
     165        else
     166  	{
     167  	  /* Temporary storage n */
     168  	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
     169  	  if (n == 0)
     170  	    goto done;
     171  	}
     172      }
     173  
     174    while (n > 2)
     175      {
     176        struct hgcd_matrix1 M;
     177        mp_limb_t uh, ul, vh, vl;
     178        mp_limb_t mask;
     179  
     180        mask = up[n-1] | vp[n-1];
     181        ASSERT (mask > 0);
     182  
     183        if (mask & GMP_NUMB_HIGHBIT)
     184  	{
     185  	  uh = up[n-1]; ul = up[n-2];
     186  	  vh = vp[n-1]; vl = vp[n-2];
     187  	}
     188        else
     189  	{
     190  	  int shift;
     191  
     192  	  count_leading_zeros (shift, mask);
     193  	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
     194  	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
     195  	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
     196  	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
     197  	}
     198  
     199        /* Try an mpn_hgcd2 step */
     200        if (mpn_hgcd2 (uh, ul, vh, vl, &M))
     201  	{
     202  	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
     203  	  MP_PTR_SWAP (up, tp);
     204  	}
     205        else
     206  	{
     207  	  /* mpn_hgcd2 has failed. Then either one of a or b is very
     208  	     small, or the difference is very small. Perform one
     209  	     subtraction followed by one division. */
     210  
     211  	  /* Temporary storage n */
     212  	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
     213  	  if (n == 0)
     214  	    goto done;
     215  	}
     216      }
     217  
     218    ASSERT(up[n-1] | vp[n-1]);
     219  
     220    /* Due to the calling convention for mpn_gcd, at most one can be even. */
     221    if ((up[0] & 1) == 0)
     222      MP_PTR_SWAP (up, vp);
     223    ASSERT ((up[0] & 1) != 0);
     224  
     225    {
     226      mp_limb_t u0, u1, v0, v1;
     227      mp_double_limb_t g;
     228  
     229      u0 = up[0];
     230      v0 = vp[0];
     231  
     232      if (n == 1)
     233        {
     234  	int cnt;
     235  	count_trailing_zeros (cnt, v0);
     236  	*gp = mpn_gcd_11 (u0, v0 >> cnt);
     237  	ctx.gn = 1;
     238  	goto done;
     239        }
     240  
     241      v1 = vp[1];
     242      if (UNLIKELY (v0 == 0))
     243        {
     244  	v0 = v1;
     245  	v1 = 0;
     246  	/* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could
     247  	   when this situation occurs internally.  */
     248        }
     249      if ((v0 & 1) == 0)
     250        {
     251  	int cnt;
     252  	count_trailing_zeros (cnt, v0);
     253  	v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt);
     254  	v1 >>= cnt;
     255        }
     256  
     257      u1 = up[1];
     258      g = mpn_gcd_22 (u1, u0, v1, v0);
     259      gp[0] = g.d0;
     260      gp[1] = g.d1;
     261      ctx.gn = 1 + (g.d1 > 0);
     262    }
     263  done:
     264    TMP_FREE;
     265    return ctx.gn;
     266  }