1  /* hgcd_appr.c.
       2  
       3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       6  
       7  Copyright 2011, 2012 Free Software Foundation, Inc.
       8  
       9  This file is part of the GNU MP Library.
      10  
      11  The GNU MP Library is free software; you can redistribute it and/or modify
      12  it under the terms of either:
      13  
      14    * the GNU Lesser General Public License as published by the Free
      15      Software Foundation; either version 3 of the License, or (at your
      16      option) any later version.
      17  
      18  or
      19  
      20    * the GNU General Public License as published by the Free Software
      21      Foundation; either version 2 of the License, or (at your option) any
      22      later version.
      23  
      24  or both in parallel, as here.
      25  
      26  The GNU MP Library is distributed in the hope that it will be useful, but
      27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      29  for more details.
      30  
      31  You should have received copies of the GNU General Public License and the
      32  GNU Lesser General Public License along with the GNU MP Library.  If not,
      33  see https://www.gnu.org/licenses/.  */
      34  
      35  #include "gmp-impl.h"
      36  #include "longlong.h"
      37  
      38  /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
      39     HGCD_THRESHOLD at the end? */
      40  mp_size_t
      41  mpn_hgcd_appr_itch (mp_size_t n)
      42  {
      43    if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
      44      return n;
      45    else
      46      {
      47        unsigned k;
      48        int count;
      49        mp_size_t nscaled;
      50  
      51        /* Get the recursion depth. */
      52        nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
      53        count_leading_zeros (count, nscaled);
      54        k = GMP_LIMB_BITS - count;
      55  
      56        return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
      57      }
      58  }
      59  
      60  /* Destroys inputs. */
      61  int
      62  mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
      63  	       struct hgcd_matrix *M, mp_ptr tp)
      64  {
      65    mp_size_t s;
      66    int success = 0;
      67  
      68    ASSERT (n > 0);
      69  
      70    ASSERT ((ap[n-1] | bp[n-1]) != 0);
      71  
      72    if (n <= 2)
      73      /* Implies s = n. A fairly uninteresting case but exercised by the
      74         random inputs of the testsuite. */
      75      return 0;
      76  
      77    ASSERT ((n+1)/2 - 1 < M->alloc);
      78  
      79    /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
      80       we discard some of the least significant limbs, we must keep one
      81       additional bit to account for the truncation error. We maintain
      82       the GMP_NUMB_BITS * s - extra_bits as the current target size. */
      83  
      84    s = n/2 + 1;
      85    if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
      86      {
      87        unsigned extra_bits = 0;
      88  
      89        while (n > 2)
      90  	{
      91  	  mp_size_t nn;
      92  
      93  	  ASSERT (n > s);
      94  	  ASSERT (n <= 2*s);
      95  
      96  	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
      97  	  if (!nn)
      98  	    break;
      99  
     100  	  n = nn;
     101  	  success = 1;
     102  
     103  	  /* We can truncate and discard the lower p bits whenever nbits <=
     104  	     2*sbits - p. To account for the truncation error, we must
     105  	     adjust
     106  
     107  	     sbits <-- sbits + 1 - p,
     108  
     109  	     rather than just sbits <-- sbits - p. This adjustment makes
     110  	     the produced matrix slightly smaller than it could be. */
     111  
     112  	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
     113  	    {
     114  	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
     115  
     116  	      if (extra_bits == 0)
     117  		{
     118  		  /* We cross a limb boundary and bump s. We can't do that
     119  		     if the result is that it makes makes min(U, V)
     120  		     smaller than 2^{GMP_NUMB_BITS} s. */
     121  		  if (s + 1 == n
     122  		      || mpn_zero_p (ap + s + 1, n - s - 1)
     123  		      || mpn_zero_p (bp + s + 1, n - s - 1))
     124  		    continue;
     125  
     126  		  extra_bits = GMP_NUMB_BITS - 1;
     127  		  s++;
     128  		}
     129  	      else
     130  		{
     131  		  extra_bits--;
     132  		}
     133  
     134  	      /* Drop the p least significant limbs */
     135  	      ap += p; bp += p; n -= p; s -= p;
     136  	    }
     137  	}
     138  
     139        ASSERT (s > 0);
     140  
     141        if (extra_bits > 0)
     142  	{
     143  	  /* We can get here only of we have dropped at least one of the least
     144  	     significant bits, so we can decrement ap and bp. We can then shift
     145  	     left extra bits using mpn_rshift. */
     146  	  /* NOTE: In the unlikely case that n is large, it would be preferable
     147  	     to do an initial subdiv step to reduce the size before shifting,
     148  	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
     149  	     count rather than a limb count. */
     150  	  ap--; bp--;
     151  	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
     152  	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
     153  	  n += (ap[n] | bp[n]) > 0;
     154  
     155  	  ASSERT (success);
     156  
     157  	  while (n > 2)
     158  	    {
     159  	      mp_size_t nn;
     160  
     161  	      ASSERT (n > s);
     162  	      ASSERT (n <= 2*s);
     163  
     164  	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
     165  
     166  	      if (!nn)
     167  		return 1;
     168  
     169  	      n = nn;
     170  	    }
     171  	}
     172  
     173        if (n == 2)
     174  	{
     175  	  struct hgcd_matrix1 M1;
     176  	  ASSERT (s == 1);
     177  
     178  	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
     179  	    {
     180  	      /* Multiply M <- M * M1 */
     181  	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
     182  	      success = 1;
     183  	    }
     184  	}
     185        return success;
     186      }
     187    else
     188      {
     189        mp_size_t n2 = (3*n)/4 + 1;
     190        mp_size_t p = n/2;
     191        mp_size_t nn;
     192  
     193        nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
     194        if (nn)
     195  	{
     196  	  n = nn;
     197  	  /* FIXME: Discard some of the low limbs immediately? */
     198  	  success = 1;
     199  	}
     200  
     201        while (n > n2)
     202  	{
     203  	  mp_size_t nn;
     204  
     205  	  /* Needs n + 1 storage */
     206  	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
     207  	  if (!nn)
     208  	    return success;
     209  
     210  	  n = nn;
     211  	  success = 1;
     212  	}
     213        if (n > s + 2)
     214  	{
     215  	  struct hgcd_matrix M1;
     216  	  mp_size_t scratch;
     217  
     218  	  p = 2*s - n + 1;
     219  	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
     220  
     221  	  mpn_hgcd_matrix_init(&M1, n - p, tp);
     222  	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
     223  	    {
     224  	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
     225  	      ASSERT (M->n + 2 >= M1.n);
     226  
     227  	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
     228  		 then either q or q + 1 is a correct quotient, and M1 will
     229  		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
     230  		 rules out the case that the size of M * M1 is much
     231  		 smaller than the expected M->n + M1->n. */
     232  
     233  	      ASSERT (M->n + M1.n < M->alloc);
     234  
     235  	      /* We need a bound for of M->n + M1.n. Let n be the original
     236  		 input size. Then
     237  
     238  		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
     239  
     240  		 and it follows that
     241  
     242  		 M.n + M1.n <= ceil(n/2) + 1
     243  
     244  		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
     245  		 amount of needed scratch space. */
     246  	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
     247  	      return 1;
     248  	    }
     249  	}
     250  
     251        for(;;)
     252  	{
     253  	  mp_size_t nn;
     254  
     255  	  ASSERT (n > s);
     256  	  ASSERT (n <= 2*s);
     257  
     258  	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
     259  
     260  	  if (!nn)
     261  	    return success;
     262  
     263  	  n = nn;
     264  	  success = 1;
     265  	}
     266      }
     267  }