1  /* jacobi.c
       2  
       3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       6  
       7  Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
       8  Inc.
       9  
      10  This file is part of the GNU MP Library.
      11  
      12  The GNU MP Library is free software; you can redistribute it and/or modify
      13  it under the terms of either:
      14  
      15    * the GNU Lesser General Public License as published by the Free
      16      Software Foundation; either version 3 of the License, or (at your
      17      option) any later version.
      18  
      19  or
      20  
      21    * the GNU General Public License as published by the Free Software
      22      Foundation; either version 2 of the License, or (at your option) any
      23      later version.
      24  
      25  or both in parallel, as here.
      26  
      27  The GNU MP Library is distributed in the hope that it will be useful, but
      28  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      29  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      30  for more details.
      31  
      32  You should have received copies of the GNU General Public License and the
      33  GNU Lesser General Public License along with the GNU MP Library.  If not,
      34  see https://www.gnu.org/licenses/.  */
      35  
      36  #include "gmp-impl.h"
      37  #include "longlong.h"
      38  
      39  #ifndef JACOBI_DC_THRESHOLD
      40  #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
      41  #endif
      42  
      43  /* Schönhage's rules:
      44   *
      45   * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
      46   *
      47   * If r1 is odd, then
      48   *
      49   *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
      50   *
      51   * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
      52   *
      53   * If r1 is even, r2 must be odd. We have
      54   *
      55   *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
      56   *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
      57   *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
      58   *
      59   * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
      60   * q1 times gives
      61   *
      62   *   (r1 | r0) = (r1 | r2) = (r3 | r2)
      63   *
      64   * On the other hand, if r1 = 2 (mod 4), the sign factor is
      65   * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
      66   *
      67   *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
      68   *   = q1 (r0-1)/2 + q1 (q1-1)/2
      69   *
      70   * and we can summarize the even case as
      71   *
      72   *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
      73   *
      74   * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
      75   *
      76   * What about termination? The remainder sequence ends with (0|1) = 1
      77   * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
      78   * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
      79   * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
      80   *
      81   * Examples: (11|15) = - (15|11) = - (4|11)
      82   *            (4|11) =    (4| 3) =   (1| 3)
      83   *            (1| 3) = (3|1) = (0|1) = 1
      84   *
      85   *             (2|7) = (2|1) = (0|1) = 1
      86   *
      87   * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
      88   *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
      89   *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
      90   *
      91   */
      92  
      93  /* In principle, the state consists of four variables: e (one bit), a,
      94     b (two bits each), d (one bit). Collected factors are (-1)^e. a and
      95     b are the least significant bits of the current remainders. d
      96     (denominator) is 0 if we're currently subtracting multiplies of a
      97     from b, and 1 if we're subtracting b from a.
      98  
      99     e is stored in the least significant bit, while a, b and d are
     100     coded as only 13 distinct values in bits 1-4, according to the
     101     following table. For rows not mentioning d, the value is either
     102     implied, or it doesn't matter. */
     103  
     104  #if WANT_ASSERT
     105  static const struct
     106  {
     107    unsigned char a;
     108    unsigned char b;
     109  } decode_table[13] = {
     110    /*  0 */ { 0, 1 },
     111    /*  1 */ { 0, 3 },
     112    /*  2 */ { 1, 1 },
     113    /*  3 */ { 1, 3 },
     114    /*  4 */ { 2, 1 },
     115    /*  5 */ { 2, 3 },
     116    /*  6 */ { 3, 1 },
     117    /*  7 */ { 3, 3 }, /* d = 1 */
     118    /*  8 */ { 1, 0 },
     119    /*  9 */ { 1, 2 },
     120    /* 10 */ { 3, 0 },
     121    /* 11 */ { 3, 2 },
     122    /* 12 */ { 3, 3 }, /* d = 0 */
     123  };
     124  #define JACOBI_A(bits) (decode_table[(bits)>>1].a)
     125  #define JACOBI_B(bits) (decode_table[(bits)>>1].b)
     126  #endif /* WANT_ASSERT */
     127  
     128  const unsigned char jacobi_table[208] = {
     129  #include "jacobitab.h"
     130  };
     131  
     132  #define BITS_FAIL 31
     133  
     134  static void
     135  jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
     136  	     mp_srcptr qp, mp_size_t qn, int d)
     137  {
     138    unsigned *bitsp = (unsigned *) p;
     139  
     140    if (gp)
     141      {
     142        ASSERT (gn > 0);
     143        if (gn != 1 || gp[0] != 1)
     144  	{
     145  	  *bitsp = BITS_FAIL;
     146  	  return;
     147  	}
     148      }
     149  
     150    if (qp)
     151      {
     152        ASSERT (qn > 0);
     153        ASSERT (d >= 0);
     154        *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
     155      }
     156  }
     157  
     158  #define CHOOSE_P(n) (2*(n) / 3)
     159  
     160  int
     161  mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
     162  {
     163    mp_size_t scratch;
     164    mp_size_t matrix_scratch;
     165    mp_ptr tp;
     166  
     167    TMP_DECL;
     168  
     169    ASSERT (n > 0);
     170    ASSERT ( (ap[n-1] | bp[n-1]) > 0);
     171    ASSERT ( (bp[0] | ap[0]) & 1);
     172  
     173    /* FIXME: Check for small sizes first, before setting up temporary
     174       storage etc. */
     175    scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
     176  
     177    if (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
     178      {
     179        mp_size_t hgcd_scratch;
     180        mp_size_t update_scratch;
     181        mp_size_t p = CHOOSE_P (n);
     182        mp_size_t dc_scratch;
     183  
     184        matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
     185        hgcd_scratch = mpn_hgcd_itch (n - p);
     186        update_scratch = p + n - 1;
     187  
     188        dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
     189        if (dc_scratch > scratch)
     190  	scratch = dc_scratch;
     191      }
     192  
     193    TMP_MARK;
     194    tp = TMP_ALLOC_LIMBS(scratch);
     195  
     196    while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
     197      {
     198        struct hgcd_matrix M;
     199        mp_size_t p = 2*n/3;
     200        mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
     201        mp_size_t nn;
     202        mpn_hgcd_matrix_init (&M, n - p, tp);
     203  
     204        nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
     205  			    tp + matrix_scratch);
     206        if (nn > 0)
     207  	{
     208  	  ASSERT (M.n <= (n - p - 1)/2);
     209  	  ASSERT (M.n + p <= (p + n - 1) / 2);
     210  	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
     211  	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
     212  	}
     213        else
     214  	{
     215  	  /* Temporary storage n */
     216  	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
     217  	  if (!n)
     218  	    {
     219  	      TMP_FREE;
     220  	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
     221  	    }
     222  	}
     223      }
     224  
     225    while (n > 2)
     226      {
     227        struct hgcd_matrix1 M;
     228        mp_limb_t ah, al, bh, bl;
     229        mp_limb_t mask;
     230  
     231        mask = ap[n-1] | bp[n-1];
     232        ASSERT (mask > 0);
     233  
     234        if (mask & GMP_NUMB_HIGHBIT)
     235  	{
     236  	  ah = ap[n-1]; al = ap[n-2];
     237  	  bh = bp[n-1]; bl = bp[n-2];
     238  	}
     239        else
     240  	{
     241  	  int shift;
     242  
     243  	  count_leading_zeros (shift, mask);
     244  	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
     245  	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
     246  	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
     247  	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
     248  	}
     249  
     250        /* Try an mpn_nhgcd2 step */
     251        if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
     252  	{
     253  	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
     254  	  MP_PTR_SWAP (ap, tp);
     255  	}
     256        else
     257  	{
     258  	  /* mpn_hgcd2 has failed. Then either one of a or b is very
     259  	     small, or the difference is very small. Perform one
     260  	     subtraction followed by one division. */
     261  	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
     262  	  if (!n)
     263  	    {
     264  	      TMP_FREE;
     265  	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
     266  	    }
     267  	}
     268      }
     269  
     270    if (bits >= 16)
     271      MP_PTR_SWAP (ap, bp);
     272  
     273    ASSERT (bp[0] & 1);
     274  
     275    if (n == 1)
     276      {
     277        mp_limb_t al, bl;
     278        al = ap[0];
     279        bl = bp[0];
     280  
     281        TMP_FREE;
     282        if (bl == 1)
     283  	return 1 - 2*(bits & 1);
     284        else
     285  	return mpn_jacobi_base (al, bl, bits << 1);
     286      }
     287  
     288    else
     289      {
     290        int res = mpn_jacobi_2 (ap, bp, bits & 1);
     291        TMP_FREE;
     292        return res;
     293      }
     294  }