1  /* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
       2  
       3     Contributed to the GNU project by Martin Boij (as part of perfpow.c).
       4  
       5  Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc.
       6  
       7  This file is part of the GNU MP Library.
       8  
       9  The GNU MP Library is free software; you can redistribute it and/or modify
      10  it under the terms of either:
      11  
      12    * the GNU Lesser General Public License as published by the Free
      13      Software Foundation; either version 3 of the License, or (at your
      14      option) any later version.
      15  
      16  or
      17  
      18    * the GNU General Public License as published by the Free Software
      19      Foundation; either version 2 of the License, or (at your option) any
      20      later version.
      21  
      22  or both in parallel, as here.
      23  
      24  The GNU MP Library is distributed in the hope that it will be useful, but
      25  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      26  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      27  for more details.
      28  
      29  You should have received copies of the GNU General Public License and the
      30  GNU Lesser General Public License along with the GNU MP Library.  If not,
      31  see https://www.gnu.org/licenses/.  */
      32  
      33  #include "gmp-impl.h"
      34  
      35  /* Computes a^2e (mod B). Uses right-to-left binary algorithm, since
      36     typical use will have e small. */
      37  static mp_limb_t
      38  powsquaredlimb (mp_limb_t a, mp_limb_t e)
      39  {
      40    mp_limb_t r;
      41  
      42    r = 1;
      43    /* if (LIKELY (e != 0)) */
      44    do {
      45      a *= a;
      46      if (e & 1)
      47        r *= a;
      48      e >>= 1;
      49    } while (e != 0);
      50  
      51    return r;
      52  }
      53  
      54  /* Compute r such that r^k * y = 1 (mod B^n).
      55  
      56     Iterates
      57       r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
      58     using Hensel lifting, each time doubling the number of known bits in r.
      59  
      60     Works just for odd k.  Else the Hensel lifting degenerates.
      61  
      62     FIXME:
      63  
      64       (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
      65  
      66       (2) Rewrite iteration as
      67  	   r' <-- r - k^{-1} r (r^k y - 1)
      68  	 and take advantage of the zero low part of r^k y - 1.
      69  
      70       (3) Use wrap-around trick.
      71  
      72       (4) Use a small table to get starting value.
      73  
      74     Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo
      75     Currently mpn_powlo requires 3*bn
      76     so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS).
      77  */
      78  
      79  void
      80  mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
      81  {
      82    mp_ptr tp2, tp3;
      83    mp_limb_t kinv, k2, r0, y0;
      84    mp_size_t order[GMP_LIMB_BITS + 1];
      85    int d;
      86  
      87    ASSERT (bn > 0);
      88    ASSERT ((k & 1) != 0);
      89  
      90    tp2 = tp + bn;
      91    tp3 = tp + bn + ((bn + 3) >> 1);
      92    k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */
      93  
      94    binvert_limb (kinv, k);
      95  
      96    /* 4-bit initial approximation:
      97  
      98     y%16 | 1  3  5  7  9 11 13 15,
      99      k%4 +-------------------------+k2%2
     100       1  | 1 11 13  7  9  3  5 15  |  1
     101       3  | 1  3  5  7  9 11 13 15  |  0
     102  
     103    */
     104    y0 = yp[0];
     105  
     106    r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8);			/* 4 bits */
     107    r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f));	/* 8 bits */
     108    r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff));	/* 16 bits */
     109  #if GMP_NUMB_BITS > 16
     110    {
     111      unsigned prec = 16;
     112      do
     113        {
     114  	r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2));
     115  	prec *= 2;
     116        }
     117      while (prec < GMP_NUMB_BITS);
     118    }
     119  #endif
     120  
     121    rp[0] = r0;
     122    if (bn == 1)
     123      return;
     124  
     125    d = 0;
     126    for (; bn != 2; bn = (bn + 1) >> 1)
     127      order[d++] = bn;
     128  
     129    order[d] = 2;
     130    bn = 1;
     131  
     132    do
     133      {
     134        mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */
     135        tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1);
     136  
     137        bn = order[d];
     138  
     139        mpn_powlo (rp, tp, &k2, 1, bn, tp3);
     140        mpn_mullo_n (tp, yp, rp, bn);
     141  
     142        /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */
     143        /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/
     144        {
     145  	mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */
     146  	int borrow;
     147  	borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0;
     148  	if (bn > pbn) /* 3 < bn */
     149  	  {
     150  	    if (borrow)
     151  	      mpn_com (tp + pbn, tp + pbn, bn - pbn);
     152  	    else
     153  	      mpn_neg (tp + pbn, tp + pbn, bn - pbn);
     154  	  }
     155        }
     156        mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0);
     157      }
     158    while (--d >= 0);
     159  }