1  /* mpn_gcdext -- Extended Greatest Common Divisor.
       2  
       3  Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
       4  Inc.
       5  
       6  This file is part of the GNU MP Library.
       7  
       8  The GNU MP Library is free software; you can redistribute it and/or modify
       9  it under the terms of either:
      10  
      11    * the GNU Lesser General Public License as published by the Free
      12      Software Foundation; either version 3 of the License, or (at your
      13      option) any later version.
      14  
      15  or
      16  
      17    * the GNU General Public License as published by the Free Software
      18      Foundation; either version 2 of the License, or (at your option) any
      19      later version.
      20  
      21  or both in parallel, as here.
      22  
      23  The GNU MP Library is distributed in the hope that it will be useful, but
      24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      26  for more details.
      27  
      28  You should have received copies of the GNU General Public License and the
      29  GNU Lesser General Public License along with the GNU MP Library.  If not,
      30  see https://www.gnu.org/licenses/.  */
      31  
      32  #include "gmp-impl.h"
      33  #include "longlong.h"
      34  
      35  /* Here, d is the index of the cofactor to update. FIXME: Could use qn
      36     = 0 for the common case q = 1. */
      37  void
      38  mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
      39  		 mp_srcptr qp, mp_size_t qn, int d)
      40  {
      41    struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
      42    mp_size_t un = ctx->un;
      43  
      44    if (gp)
      45      {
      46        mp_srcptr up;
      47  
      48        ASSERT (gn > 0);
      49        ASSERT (gp[gn-1] > 0);
      50  
      51        MPN_COPY (ctx->gp, gp, gn);
      52        ctx->gn = gn;
      53  
      54        if (d < 0)
      55  	{
      56  	  int c;
      57  
      58  	  /* Must return the smallest cofactor, +u1 or -u0 */
      59  	  MPN_CMP (c, ctx->u0, ctx->u1, un);
      60  	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
      61  
      62  	  d = c < 0;
      63  	}
      64  
      65        up = d ? ctx->u0 : ctx->u1;
      66  
      67        MPN_NORMALIZE (up, un);
      68        MPN_COPY (ctx->up, up, un);
      69  
      70        *ctx->usize = d ? -un : un;
      71      }
      72    else
      73      {
      74        mp_limb_t cy;
      75        mp_ptr u0 = ctx->u0;
      76        mp_ptr u1 = ctx->u1;
      77  
      78        ASSERT (d >= 0);
      79  
      80        if (d)
      81  	MP_PTR_SWAP (u0, u1);
      82  
      83        qn -= (qp[qn-1] == 0);
      84  
      85        /* Update u0 += q  * u1 */
      86        if (qn == 1)
      87  	{
      88  	  mp_limb_t q = qp[0];
      89  
      90  	  if (q == 1)
      91  	    /* A common case. */
      92  	    cy = mpn_add_n (u0, u0, u1, un);
      93  	  else
      94  	    cy = mpn_addmul_1 (u0, u1, un, q);
      95  	}
      96        else
      97  	{
      98  	  mp_size_t u1n;
      99  	  mp_ptr tp;
     100  
     101  	  u1n = un;
     102  	  MPN_NORMALIZE (u1, u1n);
     103  
     104  	  if (u1n == 0)
     105  	    return;
     106  
     107  	  /* Should always have u1n == un here, and u1 >= u0. The
     108  	     reason is that we alternate adding u0 to u1 and u1 to u0
     109  	     (corresponding to subtractions a - b and b - a), and we
     110  	     can get a large quotient only just after a switch, which
     111  	     means that we'll add (a multiple of) the larger u to the
     112  	     smaller. */
     113  
     114  	  tp = ctx->tp;
     115  
     116  	  if (qn > u1n)
     117  	    mpn_mul (tp, qp, qn, u1, u1n);
     118  	  else
     119  	    mpn_mul (tp, u1, u1n, qp, qn);
     120  
     121  	  u1n += qn;
     122  	  u1n -= tp[u1n-1] == 0;
     123  
     124  	  if (u1n >= un)
     125  	    {
     126  	      cy = mpn_add (u0, tp, u1n, u0, un);
     127  	      un = u1n;
     128  	    }
     129  	  else
     130  	    /* Note: Unlikely case, maybe never happens? */
     131  	    cy = mpn_add (u0, u0, un, tp, u1n);
     132  
     133  	}
     134        u0[un] = cy;
     135        ctx->un = un + (cy > 0);
     136      }
     137  }
     138  
     139  /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
     140     the matrix-vector multiplication adjusting a, b. If hgcd fails, we
     141     need at most n for the quotient and n+1 for the u update (reusing
     142     the extra u). In all, 4n + 3. */
     143  
     144  mp_size_t
     145  mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
     146  		     mp_ptr ap, mp_ptr bp, mp_size_t n,
     147  		     mp_ptr tp)
     148  {
     149    mp_size_t ualloc = n + 1;
     150  
     151    /* Keeps track of the second row of the reduction matrix
     152     *
     153     *   M = (v0, v1 ; u0, u1)
     154     *
     155     * which correspond to the first column of the inverse
     156     *
     157     *   M^{-1} = (u1, -v1; -u0, v0)
     158     *
     159     * This implies that
     160     *
     161     *   a =  u1 A (mod B)
     162     *   b = -u0 A (mod B)
     163     *
     164     * where A, B denotes the input values.
     165     */
     166  
     167    struct gcdext_ctx ctx;
     168    mp_size_t un;
     169    mp_ptr u0;
     170    mp_ptr u1;
     171    mp_ptr u2;
     172  
     173    MPN_ZERO (tp, 3*ualloc);
     174    u0 = tp; tp += ualloc;
     175    u1 = tp; tp += ualloc;
     176    u2 = tp; tp += ualloc;
     177  
     178    u1[0] = 1; un = 1;
     179  
     180    ctx.gp = gp;
     181    ctx.up = up;
     182    ctx.usize = usize;
     183  
     184    /* FIXME: Handle n == 2 differently, after the loop? */
     185    while (n >= 2)
     186      {
     187        struct hgcd_matrix1 M;
     188        mp_limb_t ah, al, bh, bl;
     189        mp_limb_t mask;
     190  
     191        mask = ap[n-1] | bp[n-1];
     192        ASSERT (mask > 0);
     193  
     194        if (mask & GMP_NUMB_HIGHBIT)
     195  	{
     196  	  ah = ap[n-1]; al = ap[n-2];
     197  	  bh = bp[n-1]; bl = bp[n-2];
     198  	}
     199        else if (n == 2)
     200  	{
     201  	  /* We use the full inputs without truncation, so we can
     202  	     safely shift left. */
     203  	  int shift;
     204  
     205  	  count_leading_zeros (shift, mask);
     206  	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
     207  	  al = ap[0] << shift;
     208  	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
     209  	  bl = bp[0] << shift;
     210  	}
     211        else
     212  	{
     213  	  int shift;
     214  
     215  	  count_leading_zeros (shift, mask);
     216  	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
     217  	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
     218  	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
     219  	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
     220  	}
     221  
     222        /* Try an mpn_nhgcd2 step */
     223        if (mpn_hgcd2 (ah, al, bh, bl, &M))
     224  	{
     225  	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
     226  	  MP_PTR_SWAP (ap, tp);
     227  	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
     228  	  MP_PTR_SWAP (u0, u2);
     229  	}
     230        else
     231  	{
     232  	  /* mpn_hgcd2 has failed. Then either one of a or b is very
     233  	     small, or the difference is very small. Perform one
     234  	     subtraction followed by one division. */
     235  	  ctx.u0 = u0;
     236  	  ctx.u1 = u1;
     237  	  ctx.tp = u2;
     238  	  ctx.un = un;
     239  
     240  	  /* Temporary storage n for the quotient and ualloc for the
     241  	     new cofactor. */
     242  	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
     243  	  if (n == 0)
     244  	    return ctx.gn;
     245  
     246  	  un = ctx.un;
     247  	}
     248      }
     249    ASSERT_ALWAYS (ap[0] > 0);
     250    ASSERT_ALWAYS (bp[0] > 0);
     251  
     252    if (ap[0] == bp[0])
     253      {
     254        int c;
     255  
     256        /* Which cofactor to return now? Candidates are +u1 and -u0,
     257  	 depending on which of a and b was most recently reduced,
     258  	 which we don't keep track of. So compare and get the smallest
     259  	 one. */
     260  
     261        gp[0] = ap[0];
     262  
     263        MPN_CMP (c, u0, u1, un);
     264        ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
     265        if (c < 0)
     266  	{
     267  	  MPN_NORMALIZE (u0, un);
     268  	  MPN_COPY (up, u0, un);
     269  	  *usize = -un;
     270  	}
     271        else
     272  	{
     273  	  MPN_NORMALIZE_NOT_ZERO (u1, un);
     274  	  MPN_COPY (up, u1, un);
     275  	  *usize = un;
     276  	}
     277        return 1;
     278      }
     279    else
     280      {
     281        mp_limb_t uh, vh;
     282        mp_limb_signed_t u;
     283        mp_limb_signed_t v;
     284        int negate;
     285  
     286        gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
     287  
     288        /* Set up = u u1 - v u0. Keep track of size, un grows by one or
     289  	 two limbs. */
     290  
     291        if (u == 0)
     292  	{
     293  	  ASSERT (v == 1);
     294  	  MPN_NORMALIZE (u0, un);
     295  	  MPN_COPY (up, u0, un);
     296  	  *usize = -un;
     297  	  return 1;
     298  	}
     299        else if (v == 0)
     300  	{
     301  	  ASSERT (u == 1);
     302  	  MPN_NORMALIZE (u1, un);
     303  	  MPN_COPY (up, u1, un);
     304  	  *usize = un;
     305  	  return 1;
     306  	}
     307        else if (u > 0)
     308  	{
     309  	  negate = 0;
     310  	  ASSERT (v < 0);
     311  	  v = -v;
     312  	}
     313        else
     314  	{
     315  	  negate = 1;
     316  	  ASSERT (v > 0);
     317  	  u = -u;
     318  	}
     319  
     320        uh = mpn_mul_1 (up, u1, un, u);
     321        vh = mpn_addmul_1 (up, u0, un, v);
     322  
     323        if ( (uh | vh) > 0)
     324  	{
     325  	  uh += vh;
     326  	  up[un++] = uh;
     327  	  if (uh < vh)
     328  	    up[un++] = 1;
     329  	}
     330  
     331        MPN_NORMALIZE_NOT_ZERO (up, un);
     332  
     333        *usize = negate ? -un : un;
     334        return 1;
     335      }
     336  }