1  /* mulmod_bnm1.c -- multiplication mod B^n-1.
       2  
       3     Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
       4     Marco Bodrato.
       5  
       6     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       7     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       8     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       9  
      10  Copyright 2009, 2010, 2012, 2013, 2020, 2022 Free Software Foundation, Inc.
      11  
      12  This file is part of the GNU MP Library.
      13  
      14  The GNU MP Library is free software; you can redistribute it and/or modify
      15  it under the terms of either:
      16  
      17    * the GNU Lesser General Public License as published by the Free
      18      Software Foundation; either version 3 of the License, or (at your
      19      option) any later version.
      20  
      21  or
      22  
      23    * the GNU General Public License as published by the Free Software
      24      Foundation; either version 2 of the License, or (at your option) any
      25      later version.
      26  
      27  or both in parallel, as here.
      28  
      29  The GNU MP Library is distributed in the hope that it will be useful, but
      30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      32  for more details.
      33  
      34  You should have received copies of the GNU General Public License and the
      35  GNU Lesser General Public License along with the GNU MP Library.  If not,
      36  see https://www.gnu.org/licenses/.  */
      37  
      38  
      39  #include "gmp-impl.h"
      40  #include "longlong.h"
      41  
      42  /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
      43     mod B^rn - 1, and values are semi-normalised; zero is represented
      44     as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
      45     tp==rp is allowed. */
      46  void
      47  mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
      48  		    mp_ptr tp)
      49  {
      50    mp_limb_t cy;
      51  
      52    ASSERT (0 < rn);
      53  
      54    mpn_mul_n (tp, ap, bp, rn);
      55    cy = mpn_add_n (rp, tp, tp + rn, rn);
      56    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
      57     * be no overflow when adding in the carry. */
      58    MPN_INCR_U (rp, rn, cy);
      59  }
      60  
      61  
      62  /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
      63     normalised representation, computation is mod B^rn + 1. Needs
      64     a scratch area of 2rn limbs at tp; tp == rp is allowed.
      65     Output is normalised. */
      66  static void
      67  mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
      68  		    mp_ptr tp)
      69  {
      70    mp_limb_t cy;
      71    unsigned k;
      72  
      73    ASSERT (0 < rn);
      74  
      75    if (UNLIKELY (ap[rn] | bp [rn]))
      76      {
      77        if (ap[rn])
      78  	cy = bp [rn] + mpn_neg (rp, bp, rn);
      79        else /* ap[rn] == 0 */
      80  	cy = mpn_neg (rp, ap, rn);
      81      }
      82    else if (MPN_MULMOD_BKNP1_USABLE (rn, k, MUL_FFT_MODF_THRESHOLD))
      83      {
      84        mp_size_t n_k = rn / k;
      85        TMP_DECL;
      86  
      87        TMP_MARK;
      88        mpn_mulmod_bknp1 (rp, ap, bp, n_k, k,
      89                         TMP_ALLOC_LIMBS (mpn_mulmod_bknp1_itch (rn)));
      90        TMP_FREE;
      91        return;
      92      }
      93    else
      94      {
      95        mpn_mul_n (tp, ap, bp, rn);
      96        cy = mpn_sub_n (rp, tp, tp + rn, rn);
      97      }
      98    rp[rn] = 0;
      99    MPN_INCR_U (rp, rn + 1, cy);
     100  }
     101  
     102  
     103  /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
     104   *
     105   * The result is expected to be ZERO if and only if one of the operand
     106   * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
     107   * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
     108   * combine results and obtain a natural number when one knows in
     109   * advance that the final value is less than (B^rn-1).
     110   * Moreover it should not be a problem if mulmod_bnm1 is used to
     111   * compute the full product with an+bn <= rn, because this condition
     112   * implies (B^an-1)(B^bn-1) < (B^rn-1) .
     113   *
     114   * Requires 0 < bn <= an <= rn and an + bn > rn/2
     115   * Scratch need: rn + (need for recursive call OR rn + 4). This gives
     116   *
     117   * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
     118   */
     119  void
     120  mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
     121  {
     122    ASSERT (0 < bn);
     123    ASSERT (bn <= an);
     124    ASSERT (an <= rn);
     125  
     126    if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
     127      {
     128        if (UNLIKELY (bn < rn))
     129  	{
     130  	  if (UNLIKELY (an + bn <= rn))
     131  	    {
     132  	      mpn_mul (rp, ap, an, bp, bn);
     133  	    }
     134  	  else
     135  	    {
     136  	      mp_limb_t cy;
     137  	      mpn_mul (tp, ap, an, bp, bn);
     138  	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
     139  	      MPN_INCR_U (rp, rn, cy);
     140  	    }
     141  	}
     142        else
     143  	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
     144      }
     145    else
     146      {
     147        mp_size_t n;
     148        mp_limb_t cy;
     149        mp_limb_t hi;
     150  
     151        n = rn >> 1;
     152  
     153        /* We need at least an + bn >= n, to be able to fit one of the
     154  	 recursive products at rp. Requiring strict inequality makes
     155  	 the code slightly simpler. If desired, we could avoid this
     156  	 restriction by initially halving rn as long as rn is even and
     157  	 an + bn <= rn/2. */
     158  
     159        ASSERT (an + bn > n);
     160  
     161        /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
     162  	 and crt together as
     163  
     164  	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
     165        */
     166  
     167  #define a0 ap
     168  #define a1 (ap + n)
     169  #define b0 bp
     170  #define b1 (bp + n)
     171  
     172  #define xp  tp	/* 2n + 2 */
     173        /* am1  maybe in {xp, n} */
     174        /* bm1  maybe in {xp + n, n} */
     175  #define sp1 (tp + 2*n + 2)
     176        /* ap1  maybe in {sp1, n + 1} */
     177        /* bp1  maybe in {sp1 + n + 1, n + 1} */
     178  
     179        {
     180  	mp_srcptr am1, bm1;
     181  	mp_size_t anm, bnm;
     182  	mp_ptr so;
     183  
     184  	bm1 = b0;
     185  	bnm = bn;
     186  	if (LIKELY (an > n))
     187  	  {
     188  	    am1 = xp;
     189  	    cy = mpn_add (xp, a0, n, a1, an - n);
     190  	    MPN_INCR_U (xp, n, cy);
     191  	    anm = n;
     192  	    so = xp + n;
     193  	    if (LIKELY (bn > n))
     194  	      {
     195  		bm1 = so;
     196  		cy = mpn_add (so, b0, n, b1, bn - n);
     197  		MPN_INCR_U (so, n, cy);
     198  		bnm = n;
     199  		so += n;
     200  	      }
     201  	  }
     202  	else
     203  	  {
     204  	    so = xp;
     205  	    am1 = a0;
     206  	    anm = an;
     207  	  }
     208  
     209  	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
     210        }
     211  
     212        {
     213  	int       k;
     214  	mp_srcptr ap1, bp1;
     215  	mp_size_t anp, bnp;
     216  
     217  	bp1 = b0;
     218  	bnp = bn;
     219  	if (LIKELY (an > n)) {
     220  	  ap1 = sp1;
     221  	  cy = mpn_sub (sp1, a0, n, a1, an - n);
     222  	  sp1[n] = 0;
     223  	  MPN_INCR_U (sp1, n + 1, cy);
     224  	  anp = n + ap1[n];
     225  	  if (LIKELY (bn > n)) {
     226  	    bp1 = sp1 + n + 1;
     227  	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
     228  	    sp1[2*n+1] = 0;
     229  	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
     230  	    bnp = n + bp1[n];
     231  	  }
     232  	} else {
     233  	  ap1 = a0;
     234  	  anp = an;
     235  	}
     236  
     237  	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
     238  	  k=0;
     239  	else
     240  	  {
     241  	    int mask;
     242  	    k = mpn_fft_best_k (n, 0);
     243  	    mask = (1<<k) - 1;
     244  	    while (n & mask) {k--; mask >>=1;};
     245  	  }
     246  	if (k >= FFT_FIRST_K)
     247  	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
     248  	else if (UNLIKELY (bp1 == b0))
     249  	  {
     250  	    ASSERT (anp + bnp <= 2*n+1);
     251  	    ASSERT (anp + bnp > n);
     252  	    ASSERT (anp >= bnp);
     253  	    mpn_mul (xp, ap1, anp, bp1, bnp);
     254  	    anp = anp + bnp - n;
     255  	    ASSERT (anp <= n || xp[2*n]==0);
     256  	    anp-= anp > n;
     257  	    cy = mpn_sub (xp, xp, n, xp + n, anp);
     258  	    xp[n] = 0;
     259  	    MPN_INCR_U (xp, n+1, cy);
     260  	  }
     261  	else
     262  	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
     263        }
     264  
     265        /* Here the CRT recomposition begins.
     266  
     267  	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
     268  	 Division by 2 is a bitwise rotation.
     269  
     270  	 Assumes xp normalised mod (B^n+1).
     271  
     272  	 The residue class [0] is represented by [B^n-1]; except when
     273  	 both input are ZERO.
     274        */
     275  
     276  #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
     277  #if HAVE_NATIVE_mpn_rsh1add_nc
     278        cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
     279        hi = cy << (GMP_NUMB_BITS - 1);
     280        cy = 0;
     281        /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
     282  	 overflows, i.e. a further increment will not overflow again. */
     283  #else /* ! _nc */
     284        cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
     285        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
     286        cy >>= 1;
     287        /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
     288  	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
     289  #endif
     290  #if GMP_NAIL_BITS == 0
     291        add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
     292  #else
     293        cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
     294        rp[n-1] ^= hi;
     295  #endif
     296  #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
     297  #if HAVE_NATIVE_mpn_add_nc
     298        cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
     299  #else /* ! _nc */
     300        cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
     301  #endif
     302        cy += (rp[0]&1);
     303        mpn_rshift(rp, rp, n, 1);
     304        ASSERT (cy <= 2);
     305        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
     306        cy >>= 1;
     307        /* We can have cy != 0 only if hi = 0... */
     308        ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
     309        rp[n-1] |= hi;
     310        /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
     311  #endif
     312        ASSERT (cy <= 1);
     313        /* Next increment can not overflow, read the previous comments about cy. */
     314        ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
     315        MPN_INCR_U(rp, n, cy);
     316  
     317        /* Compute the highest half:
     318  	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
     319         */
     320        if (UNLIKELY (an + bn < rn))
     321  	{
     322  	  /* Note that in this case, the only way the result can equal
     323  	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
     324  	     then the output of both the recursive calls and this CRT
     325  	     reconstruction is zero, not B^{rn} - 1. Which is good,
     326  	     since the latter representation doesn't fit in the output
     327  	     area.*/
     328  	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
     329  
     330  	  /* FIXME: This subtraction of the high parts is not really
     331  	     necessary, we do it to get the carry out, and for sanity
     332  	     checking. */
     333  	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
     334  				   xp + an + bn - n, rn - (an + bn), cy);
     335  	  ASSERT (an + bn == rn - 1 ||
     336  		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
     337  	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
     338  	  ASSERT (cy == (xp + an + bn - n)[0]);
     339  	}
     340        else
     341  	{
     342  	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
     343  	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
     344  	     DECR will affect _at most_ the lowest n limbs. */
     345  	  MPN_DECR_U (rp, 2*n, cy);
     346  	}
     347  #undef a0
     348  #undef a1
     349  #undef b0
     350  #undef b1
     351  #undef xp
     352  #undef sp1
     353      }
     354  }
     355  
     356  mp_size_t
     357  mpn_mulmod_bnm1_next_size (mp_size_t n)
     358  {
     359    mp_size_t nh;
     360  
     361    if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
     362      return n;
     363    if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
     364      return (n + (2-1)) & (-2);
     365    if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
     366      return (n + (4-1)) & (-4);
     367  
     368    nh = (n + 1) >> 1;
     369  
     370    if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
     371      return (n + (8-1)) & (-8);
     372  
     373    return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
     374  }