1  /* sqrmod_bnm1.c -- squaring mod B^n-1.
       2  
       3     Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
       4     Marco Bodrato.
       5  
       6     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       7     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       8     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       9  
      10  Copyright 2009, 2010, 2012, 2020, 2022 Free Software Foundation, Inc.
      11  
      12  This file is part of the GNU MP Library.
      13  
      14  The GNU MP Library is free software; you can redistribute it and/or modify
      15  it under the terms of either:
      16  
      17    * the GNU Lesser General Public License as published by the Free
      18      Software Foundation; either version 3 of the License, or (at your
      19      option) any later version.
      20  
      21  or
      22  
      23    * the GNU General Public License as published by the Free Software
      24      Foundation; either version 2 of the License, or (at your option) any
      25      later version.
      26  
      27  or both in parallel, as here.
      28  
      29  The GNU MP Library is distributed in the hope that it will be useful, but
      30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      32  for more details.
      33  
      34  You should have received copies of the GNU General Public License and the
      35  GNU Lesser General Public License along with the GNU MP Library.  If not,
      36  see https://www.gnu.org/licenses/.  */
      37  
      38  
      39  #include "gmp-impl.h"
      40  #include "longlong.h"
      41  
      42  /* Input is {ap,rn}; output is {rp,rn}, computation is
      43     mod B^rn - 1, and values are semi-normalised; zero is represented
      44     as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
      45     tp==rp is allowed. */
      46  static void
      47  mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
      48  {
      49    mp_limb_t cy;
      50  
      51    ASSERT (0 < rn);
      52  
      53    mpn_sqr (tp, ap, rn);
      54    cy = mpn_add_n (rp, tp, tp + rn, rn);
      55    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
      56     * be no overflow when adding in the carry. */
      57    MPN_INCR_U (rp, rn, cy);
      58  }
      59  
      60  
      61  /* Input is {ap,rn+1}; output is {rp,rn+1}, in
      62     normalised representation, computation is mod B^rn + 1. Needs
      63     a scratch area of 2rn limbs at tp; tp == rp is allowed.
      64     Output is normalised. */
      65  static void
      66  mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
      67  {
      68    mp_limb_t cy;
      69    unsigned k;
      70  
      71    ASSERT (0 < rn);
      72  
      73    if (UNLIKELY (ap[rn]))
      74      {
      75        *rp = 1;
      76        MPN_FILL (rp + 1, rn, 0);
      77        return;
      78      }
      79    else if (MPN_SQRMOD_BKNP1_USABLE (rn, k, MUL_FFT_MODF_THRESHOLD))
      80      {
      81        mp_size_t n_k = rn / k;
      82        TMP_DECL;
      83  
      84        TMP_MARK;
      85        mpn_sqrmod_bknp1 (rp, ap, n_k, k,
      86  			TMP_ALLOC_LIMBS (mpn_sqrmod_bknp1_itch (rn)));
      87        TMP_FREE;
      88        return;
      89      }
      90    mpn_sqr (tp, ap, rn);
      91    cy = mpn_sub_n (rp, tp, tp + rn, rn);
      92    rp[rn] = 0;
      93    MPN_INCR_U (rp, rn + 1, cy);
      94  }
      95  
      96  
      97  /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
      98   *
      99   * The result is expected to be ZERO if and only if the operand
     100   * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
     101   * B^rn-1.
     102   * It should not be a problem if sqrmod_bnm1 is used to
     103   * compute the full square with an <= 2*rn, because this condition
     104   * implies (B^an-1)^2 < (B^rn-1) .
     105   *
     106   * Requires rn/4 < an <= rn
     107   * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
     108   *
     109   * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
     110   */
     111  void
     112  mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
     113  {
     114    ASSERT (0 < an);
     115    ASSERT (an <= rn);
     116  
     117    if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
     118      {
     119        if (UNLIKELY (an < rn))
     120  	{
     121  	  if (UNLIKELY (2*an <= rn))
     122  	    {
     123  	      mpn_sqr (rp, ap, an);
     124  	    }
     125  	  else
     126  	    {
     127  	      mp_limb_t cy;
     128  	      mpn_sqr (tp, ap, an);
     129  	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
     130  	      MPN_INCR_U (rp, rn, cy);
     131  	    }
     132  	}
     133        else
     134  	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
     135      }
     136    else
     137      {
     138        mp_size_t n;
     139        mp_limb_t cy;
     140        mp_limb_t hi;
     141  
     142        n = rn >> 1;
     143  
     144        ASSERT (2*an > n);
     145  
     146        /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
     147  	 and crt together as
     148  
     149  	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
     150        */
     151  
     152  #define a0 ap
     153  #define a1 (ap + n)
     154  
     155  #define xp  tp	/* 2n + 2 */
     156        /* am1  maybe in {xp, n} */
     157  #define sp1 (tp + 2*n + 2)
     158        /* ap1  maybe in {sp1, n + 1} */
     159  
     160        {
     161  	mp_srcptr am1;
     162  	mp_size_t anm;
     163  	mp_ptr so;
     164  
     165  	if (LIKELY (an > n))
     166  	  {
     167  	    so = xp + n;
     168  	    am1 = xp;
     169  	    cy = mpn_add (xp, a0, n, a1, an - n);
     170  	    MPN_INCR_U (xp, n, cy);
     171  	    anm = n;
     172  	  }
     173  	else
     174  	  {
     175  	    so = xp;
     176  	    am1 = a0;
     177  	    anm = an;
     178  	  }
     179  
     180  	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
     181        }
     182  
     183        {
     184  	int       k;
     185  	mp_srcptr ap1;
     186  	mp_size_t anp;
     187  
     188  	if (LIKELY (an > n)) {
     189  	  ap1 = sp1;
     190  	  cy = mpn_sub (sp1, a0, n, a1, an - n);
     191  	  sp1[n] = 0;
     192  	  MPN_INCR_U (sp1, n + 1, cy);
     193  	  anp = n + ap1[n];
     194  	} else {
     195  	  ap1 = a0;
     196  	  anp = an;
     197  	}
     198  
     199  	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
     200  	  k=0;
     201  	else
     202  	  {
     203  	    int mask;
     204  	    k = mpn_fft_best_k (n, 1);
     205  	    mask = (1<<k) -1;
     206  	    while (n & mask) {k--; mask >>=1;};
     207  	  }
     208  	if (k >= FFT_FIRST_K)
     209  	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
     210  	else if (UNLIKELY (ap1 == a0))
     211  	  {
     212  	    ASSERT (anp <= n);
     213  	    ASSERT (2*anp > n);
     214  	    mpn_sqr (xp, a0, an);
     215  	    anp = 2*an - n;
     216  	    cy = mpn_sub (xp, xp, n, xp + n, anp);
     217  	    xp[n] = 0;
     218  	    MPN_INCR_U (xp, n+1, cy);
     219  	  }
     220  	else
     221  	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
     222        }
     223  
     224        /* Here the CRT recomposition begins.
     225  
     226  	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
     227  	 Division by 2 is a bitwise rotation.
     228  
     229  	 Assumes xp normalised mod (B^n+1).
     230  
     231  	 The residue class [0] is represented by [B^n-1]; except when
     232  	 both input are ZERO.
     233        */
     234  
     235  #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
     236  #if HAVE_NATIVE_mpn_rsh1add_nc
     237        cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
     238        hi = cy << (GMP_NUMB_BITS - 1);
     239        cy = 0;
     240        /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
     241  	 overflows, i.e. a further increment will not overflow again. */
     242  #else /* ! _nc */
     243        cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
     244        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
     245        cy >>= 1;
     246        /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
     247  	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
     248  #endif
     249  #if GMP_NAIL_BITS == 0
     250        add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
     251  #else
     252        cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
     253        rp[n-1] ^= hi;
     254  #endif
     255  #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
     256  #if HAVE_NATIVE_mpn_add_nc
     257        cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
     258  #else /* ! _nc */
     259        cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
     260  #endif
     261        cy += (rp[0]&1);
     262        mpn_rshift(rp, rp, n, 1);
     263        ASSERT (cy <= 2);
     264        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
     265        cy >>= 1;
     266        /* We can have cy != 0 only if hi = 0... */
     267        ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
     268        rp[n-1] |= hi;
     269        /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
     270  #endif
     271        ASSERT (cy <= 1);
     272        /* Next increment can not overflow, read the previous comments about cy. */
     273        ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
     274        MPN_INCR_U(rp, n, cy);
     275  
     276        /* Compute the highest half:
     277  	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
     278         */
     279        if (UNLIKELY (2*an < rn))
     280  	{
     281  	  /* Note that in this case, the only way the result can equal
     282  	     zero mod B^{rn} - 1 is if the input is zero, and
     283  	     then the output of both the recursive calls and this CRT
     284  	     reconstruction is zero, not B^{rn} - 1. */
     285  	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
     286  
     287  	  /* FIXME: This subtraction of the high parts is not really
     288  	     necessary, we do it to get the carry out, and for sanity
     289  	     checking. */
     290  	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
     291  				   xp + 2*an - n, rn - 2*an, cy);
     292  	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
     293  	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
     294  	  ASSERT (cy == (xp + 2*an - n)[0]);
     295  	}
     296        else
     297  	{
     298  	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
     299  	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
     300  	     DECR will affect _at most_ the lowest n limbs. */
     301  	  MPN_DECR_U (rp, 2*n, cy);
     302  	}
     303  #undef a0
     304  #undef a1
     305  #undef xp
     306  #undef sp1
     307      }
     308  }
     309  
     310  mp_size_t
     311  mpn_sqrmod_bnm1_next_size (mp_size_t n)
     312  {
     313    mp_size_t nh;
     314  
     315    if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
     316      return n;
     317    if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
     318      return (n + (2-1)) & (-2);
     319    if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
     320      return (n + (4-1)) & (-4);
     321  
     322    nh = (n + 1) >> 1;
     323  
     324    if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
     325      return (n + (8-1)) & (-8);
     326  
     327    return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
     328  }