1  /* hgcd_matrix.c.
       2  
       3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       6  
       7  Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc.
       8  
       9  This file is part of the GNU MP Library.
      10  
      11  The GNU MP Library is free software; you can redistribute it and/or modify
      12  it under the terms of either:
      13  
      14    * the GNU Lesser General Public License as published by the Free
      15      Software Foundation; either version 3 of the License, or (at your
      16      option) any later version.
      17  
      18  or
      19  
      20    * the GNU General Public License as published by the Free Software
      21      Foundation; either version 2 of the License, or (at your option) any
      22      later version.
      23  
      24  or both in parallel, as here.
      25  
      26  The GNU MP Library is distributed in the hope that it will be useful, but
      27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      29  for more details.
      30  
      31  You should have received copies of the GNU General Public License and the
      32  GNU Lesser General Public License along with the GNU MP Library.  If not,
      33  see https://www.gnu.org/licenses/.  */
      34  
      35  #include "gmp-impl.h"
      36  #include "longlong.h"
      37  
      38  /* For input of size n, matrix elements are of size at most ceil(n/2)
      39     - 1, but we need two limbs extra. */
      40  void
      41  mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
      42  {
      43    mp_size_t s = (n+1)/2 + 1;
      44    M->alloc = s;
      45    M->n = 1;
      46    MPN_ZERO (p, 4 * s);
      47    M->p[0][0] = p;
      48    M->p[0][1] = p + s;
      49    M->p[1][0] = p + 2 * s;
      50    M->p[1][1] = p + 3 * s;
      51  
      52    M->p[0][0][0] = M->p[1][1][0] = 1;
      53  }
      54  
      55  /* Update column COL, adding in Q * column (1-COL). Temporary storage:
      56   * qn + n <= M->alloc, where n is the size of the largest element in
      57   * column 1 - COL. */
      58  void
      59  mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
      60  			  unsigned col, mp_ptr tp)
      61  {
      62    ASSERT (col < 2);
      63  
      64    if (qn == 1)
      65      {
      66        mp_limb_t q = qp[0];
      67        mp_limb_t c0, c1;
      68  
      69        c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
      70        c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
      71  
      72        M->p[0][col][M->n] = c0;
      73        M->p[1][col][M->n] = c1;
      74  
      75        M->n += (c0 | c1) != 0;
      76      }
      77    else
      78      {
      79        unsigned row;
      80  
      81        /* Carries for the unlikely case that we get both high words
      82  	 from the multiplication and carries from the addition. */
      83        mp_limb_t c[2];
      84        mp_size_t n;
      85  
      86        /* The matrix will not necessarily grow in size by qn, so we
      87  	 need normalization in order not to overflow M. */
      88  
      89        for (n = M->n; n + qn > M->n; n--)
      90  	{
      91  	  ASSERT (n > 0);
      92  	  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
      93  	    break;
      94  	}
      95  
      96        ASSERT (qn + n <= M->alloc);
      97  
      98        for (row = 0; row < 2; row++)
      99  	{
     100  	  if (qn <= n)
     101  	    mpn_mul (tp, M->p[row][1-col], n, qp, qn);
     102  	  else
     103  	    mpn_mul (tp, qp, qn, M->p[row][1-col], n);
     104  
     105  	  ASSERT (n + qn >= M->n);
     106  	  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
     107  	}
     108  
     109        n += qn;
     110  
     111        if (c[0] | c[1])
     112  	{
     113  	  M->p[0][col][n] = c[0];
     114  	  M->p[1][col][n] = c[1];
     115  	  n++;
     116  	}
     117        else
     118  	{
     119  	  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
     120  	  ASSERT (n >= M->n);
     121  	}
     122        M->n = n;
     123      }
     124  
     125    ASSERT (M->n < M->alloc);
     126  }
     127  
     128  /* Multiply M by M1 from the right. Since the M1 elements fit in
     129     GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
     130     temporary space M->n */
     131  void
     132  mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
     133  		       mp_ptr tp)
     134  {
     135    mp_size_t n0, n1;
     136  
     137    /* Could avoid copy by some swapping of pointers. */
     138    MPN_COPY (tp, M->p[0][0], M->n);
     139    n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
     140    MPN_COPY (tp, M->p[1][0], M->n);
     141    n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
     142  
     143    /* Depends on zero initialization */
     144    M->n = MAX(n0, n1);
     145    ASSERT (M->n < M->alloc);
     146  }
     147  
     148  /* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
     149     of temporary storage (see mpn_matrix22_mul_itch). */
     150  void
     151  mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
     152  		     mp_ptr tp)
     153  {
     154    mp_size_t n;
     155  
     156    /* About the new size of M:s elements. Since M1's diagonal elements
     157       are > 0, no element can decrease. The new elements are of size
     158       M->n + M1->n, one limb more or less. The computation of the
     159       matrix product produces elements of size M->n + M1->n + 1. But
     160       the true size, after normalization, may be three limbs smaller.
     161  
     162       The reason that the product has normalized size >= M->n + M1->n -
     163       2 is subtle. It depends on the fact that M and M1 can be factored
     164       as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
     165       M ending with a large power and M1 starting with a large power of
     166       the same matrix. */
     167  
     168    /* FIXME: Strassen multiplication gives only a small speedup. In FFT
     169       multiplication range, this function could be sped up quite a lot
     170       using invariance. */
     171    ASSERT (M->n + M1->n < M->alloc);
     172  
     173    ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
     174  	   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
     175  
     176    ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
     177  	   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
     178  
     179    mpn_matrix22_mul (M->p[0][0], M->p[0][1],
     180  		    M->p[1][0], M->p[1][1], M->n,
     181  		    M1->p[0][0], M1->p[0][1],
     182  		    M1->p[1][0], M1->p[1][1], M1->n, tp);
     183  
     184    /* Index of last potentially non-zero limb, size is one greater. */
     185    n = M->n + M1->n;
     186  
     187    n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
     188    n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
     189    n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
     190  
     191    ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
     192  
     193    M->n = n + 1;
     194  }
     195  
     196  /* Multiplies the least significant p limbs of (a;b) by M^-1.
     197     Temporary space needed: 2 * (p + M->n)*/
     198  mp_size_t
     199  mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
     200  			mp_size_t n, mp_ptr ap, mp_ptr bp,
     201  			mp_size_t p, mp_ptr tp)
     202  {
     203    /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
     204       = (r11 a - r01 b; - r10 a + r00 b */
     205  
     206    mp_ptr t0 = tp;
     207    mp_ptr t1 = tp + p + M->n;
     208    mp_limb_t ah, bh;
     209    mp_limb_t cy;
     210  
     211    ASSERT (p + M->n  < n);
     212  
     213    /* First compute the two values depending on a, before overwriting a */
     214  
     215    if (M->n >= p)
     216      {
     217        mpn_mul (t0, M->p[1][1], M->n, ap, p);
     218        mpn_mul (t1, M->p[1][0], M->n, ap, p);
     219      }
     220    else
     221      {
     222        mpn_mul (t0, ap, p, M->p[1][1], M->n);
     223        mpn_mul (t1, ap, p, M->p[1][0], M->n);
     224      }
     225  
     226    /* Update a */
     227    MPN_COPY (ap, t0, p);
     228    ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
     229  
     230    if (M->n >= p)
     231      mpn_mul (t0, M->p[0][1], M->n, bp, p);
     232    else
     233      mpn_mul (t0, bp, p, M->p[0][1], M->n);
     234  
     235    cy = mpn_sub (ap, ap, n, t0, p + M->n);
     236    ASSERT (cy <= ah);
     237    ah -= cy;
     238  
     239    /* Update b */
     240    if (M->n >= p)
     241      mpn_mul (t0, M->p[0][0], M->n, bp, p);
     242    else
     243      mpn_mul (t0, bp, p, M->p[0][0], M->n);
     244  
     245    MPN_COPY (bp, t0, p);
     246    bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
     247    cy = mpn_sub (bp, bp, n, t1, p + M->n);
     248    ASSERT (cy <= bh);
     249    bh -= cy;
     250  
     251    if (ah > 0 || bh > 0)
     252      {
     253        ap[n] = ah;
     254        bp[n] = bh;
     255        n++;
     256      }
     257    else
     258      {
     259        /* The subtraction can reduce the size by at most one limb. */
     260        if (ap[n-1] == 0 && bp[n-1] == 0)
     261  	n--;
     262      }
     263    ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
     264    return n;
     265  }