1  /* hgcd_jacobi.c.
       2  
       3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       6  
       7  Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
       8  
       9  This file is part of the GNU MP Library.
      10  
      11  The GNU MP Library is free software; you can redistribute it and/or modify
      12  it under the terms of either:
      13  
      14    * the GNU Lesser General Public License as published by the Free
      15      Software Foundation; either version 3 of the License, or (at your
      16      option) any later version.
      17  
      18  or
      19  
      20    * the GNU General Public License as published by the Free Software
      21      Foundation; either version 2 of the License, or (at your option) any
      22      later version.
      23  
      24  or both in parallel, as here.
      25  
      26  The GNU MP Library is distributed in the hope that it will be useful, but
      27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      29  for more details.
      30  
      31  You should have received copies of the GNU General Public License and the
      32  GNU Lesser General Public License along with the GNU MP Library.  If not,
      33  see https://www.gnu.org/licenses/.  */
      34  
      35  #include "gmp-impl.h"
      36  #include "longlong.h"
      37  
      38  /* This file is almost a copy of hgcd.c, with some added calls to
      39     mpn_jacobi_update */
      40  
      41  struct hgcd_jacobi_ctx
      42  {
      43    struct hgcd_matrix *M;
      44    unsigned *bitsp;
      45  };
      46  
      47  static void
      48  hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
      49  		  mp_srcptr qp, mp_size_t qn, int d)
      50  {
      51    ASSERT (!gp);
      52    ASSERT (d >= 0);
      53  
      54    MPN_NORMALIZE (qp, qn);
      55    if (qn > 0)
      56      {
      57        struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
      58        /* NOTES: This is a bit ugly. A tp area is passed to
      59  	 gcd_subdiv_step, which stores q at the start of that area. We
      60  	 now use the rest. */
      61        mp_ptr tp = (mp_ptr) qp + qn;
      62  
      63        mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
      64        *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
      65      }
      66  }
      67  
      68  /* Perform a few steps, using some of mpn_hgcd2, subtraction and
      69     division. Reduces the size by almost one limb or more, but never
      70     below the given size s. Return new size for a and b, or 0 if no
      71     more steps are possible.
      72  
      73     If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
      74     limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
      75     fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
      76     hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
      77     resulting size of M.
      78  
      79     If N is the input size to the calling hgcd, then s = floor(N/2) +
      80     1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
      81     < N, so N is sufficient.
      82  */
      83  
      84  static mp_size_t
      85  hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
      86  		  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
      87  {
      88    struct hgcd_matrix1 M1;
      89    mp_limb_t mask;
      90    mp_limb_t ah, al, bh, bl;
      91  
      92    ASSERT (n > s);
      93  
      94    mask = ap[n-1] | bp[n-1];
      95    ASSERT (mask > 0);
      96  
      97    if (n == s + 1)
      98      {
      99        if (mask < 4)
     100  	goto subtract;
     101  
     102        ah = ap[n-1]; al = ap[n-2];
     103        bh = bp[n-1]; bl = bp[n-2];
     104      }
     105    else if (mask & GMP_NUMB_HIGHBIT)
     106      {
     107        ah = ap[n-1]; al = ap[n-2];
     108        bh = bp[n-1]; bl = bp[n-2];
     109      }
     110    else
     111      {
     112        int shift;
     113  
     114        count_leading_zeros (shift, mask);
     115        ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
     116        al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
     117        bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
     118        bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
     119      }
     120  
     121    /* Try an mpn_hgcd2 step */
     122    if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
     123      {
     124        /* Multiply M <- M * M1 */
     125        mpn_hgcd_matrix_mul_1 (M, &M1, tp);
     126  
     127        /* Can't swap inputs, so we need to copy. */
     128        MPN_COPY (tp, ap, n);
     129        /* Multiply M1^{-1} (a;b) */
     130        return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
     131      }
     132  
     133   subtract:
     134    {
     135      struct hgcd_jacobi_ctx ctx;
     136      ctx.M = M;
     137      ctx.bitsp = bitsp;
     138  
     139      return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
     140    }
     141  }
     142  
     143  /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
     144     with elements of size at most (n+1)/2 - 1. Returns new size of a,
     145     b, or zero if no reduction is possible. */
     146  
     147  /* Same scratch requirements as for mpn_hgcd. */
     148  mp_size_t
     149  mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
     150  		 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
     151  {
     152    mp_size_t s = n/2 + 1;
     153  
     154    mp_size_t nn;
     155    int success = 0;
     156  
     157    if (n <= s)
     158      /* Happens when n <= 2, a fairly uninteresting case but exercised
     159         by the random inputs of the testsuite. */
     160      return 0;
     161  
     162    ASSERT ((ap[n-1] | bp[n-1]) > 0);
     163  
     164    ASSERT ((n+1)/2 - 1 < M->alloc);
     165  
     166    if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
     167      {
     168        mp_size_t n2 = (3*n)/4 + 1;
     169        mp_size_t p = n/2;
     170  
     171        nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
     172        if (nn > 0)
     173  	{
     174  	  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
     175  	     = 2 (n - 1) */
     176  	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
     177  	  success = 1;
     178  	}
     179        while (n > n2)
     180  	{
     181  	  /* Needs n + 1 storage */
     182  	  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
     183  	  if (!nn)
     184  	    return success ? n : 0;
     185  	  n = nn;
     186  	  success = 1;
     187  	}
     188  
     189        if (n > s + 2)
     190  	{
     191  	  struct hgcd_matrix M1;
     192  	  mp_size_t scratch;
     193  
     194  	  p = 2*s - n + 1;
     195  	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
     196  
     197  	  mpn_hgcd_matrix_init(&M1, n - p, tp);
     198  	  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
     199  	  if (nn > 0)
     200  	    {
     201  	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
     202  	      ASSERT (M->n + 2 >= M1.n);
     203  
     204  	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
     205  		 then either q or q + 1 is a correct quotient, and M1 will
     206  		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
     207  		 rules out the case that the size of M * M1 is much
     208  		 smaller than the expected M->n + M1->n. */
     209  
     210  	      ASSERT (M->n + M1.n < M->alloc);
     211  
     212  	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
     213  		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
     214  	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
     215  
     216  	      /* We need a bound for of M->n + M1.n. Let n be the original
     217  		 input size. Then
     218  
     219  		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
     220  
     221  		 and it follows that
     222  
     223  		 M.n + M1.n <= ceil(n/2) + 1
     224  
     225  		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
     226  		 amount of needed scratch space. */
     227  	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
     228  	      success = 1;
     229  	    }
     230  	}
     231      }
     232  
     233    for (;;)
     234      {
     235        /* Needs s+3 < n */
     236        nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
     237        if (!nn)
     238  	return success ? n : 0;
     239  
     240        n = nn;
     241        success = 1;
     242      }
     243  }