(root)/
gmp-6.3.0/
mpn/
generic/
mulmod_bknp1.c
       1  /* Mulptiplication mod B^n+1, for small operands.
       2  
       3     Contributed to the GNU project by Marco Bodrato.
       4  
       5     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       6     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2020-2022 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  #include "gmp-impl.h"
      38  
      39  #ifndef MOD_BKNP1_USE11
      40  #define MOD_BKNP1_USE11 ((GMP_NUMB_BITS % 8 != 0) && (GMP_NUMB_BITS % 2 == 0))
      41  #endif
      42  #ifndef MOD_BKNP1_ONLY3
      43  #define MOD_BKNP1_ONLY3 0
      44  #endif
      45  
      46  /* {rp, (k - 1) * n} = {op, k * n + 1} % (B^{k*n}+1) / (B^n+1) */
      47  static void
      48  _mpn_modbknp1dbnp1_n (mp_ptr rp, mp_srcptr op, mp_size_t n, unsigned k)
      49  {
      50    mp_limb_t hl;
      51    mp_srcptr hp;
      52    unsigned i;
      53  
      54  #if MOD_BKNP1_ONLY3
      55    ASSERT (k == 3);
      56    k = 3;
      57  #endif
      58    ASSERT (k > 2);
      59    ASSERT (k % 2 == 1);
      60  
      61    --k;
      62  
      63    rp += k * n;
      64    op += k * n;
      65    hp = op;
      66    hl = hp[n]; /* initial op[k*n]. */
      67    ASSERT (hl < GMP_NUMB_MAX - 1);
      68  
      69  #if MOD_BKNP1_ONLY3 == 0
      70    /* The first MPN_INCR_U (rp + n, 1, cy); in the loop should be
      71       rp[n] = cy;						*/
      72    *rp = 0;
      73  #endif
      74  
      75    i = k >> 1;
      76    do
      77     {
      78       mp_limb_t cy, bw;
      79       rp -= n;
      80       op -= n;
      81       cy = hl + mpn_add_n (rp, op, hp, n);
      82  #if MOD_BKNP1_ONLY3
      83       rp[n] = cy;
      84  #else
      85       MPN_INCR_U (rp + n, (k - i * 2) * n + 1, cy);
      86  #endif
      87       rp -= n;
      88       op -= n;
      89       bw = hl + mpn_sub_n (rp, op, hp, n);
      90       MPN_DECR_U (rp + n, (k - i * 2 + 1) * n + 1, bw);
      91     }
      92    while (--i != 0);
      93  
      94    for (; (hl = *(rp += k * n)) != 0; ) /* Should run only once... */
      95      {
      96        *rp = 0;
      97        i = k >> 1;
      98        do
      99  	{
     100  	  rp -= n;
     101  	  MPN_INCR_U (rp, (k - i * 2 + 1) * n + 1, hl);
     102  	  rp -= n;
     103  	  MPN_DECR_U (rp, (k - i * 2 + 2) * n + 1, hl);
     104  	}
     105        while (--i != 0);
     106      }
     107  }
     108  
     109  static void
     110  _mpn_modbnp1_pn_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
     111  {
     112    ASSERT (r[n] == h);
     113  
     114    /* Fully normalise */
     115    MPN_DECR_U (r, n + 1, h);
     116    h -= r[n];
     117    r[n] = 0;
     118    MPN_INCR_U (r, n + 1, h);
     119  }
     120  
     121  static void
     122  _mpn_modbnp1_neg_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
     123  {
     124    r[n] = 0;
     125    MPN_INCR_U (r, n + 1, -h);
     126    if (UNLIKELY (r[n] != 0))
     127      _mpn_modbnp1_pn_ip (r, n, 1);
     128  }
     129  
     130  static void
     131  _mpn_modbnp1_nc_ip (mp_ptr r, mp_size_t n, mp_limb_t h)
     132  {
     133    if (h & GMP_NUMB_HIGHBIT) /* This means h < 0 */
     134      {
     135        _mpn_modbnp1_neg_ip (r, n, h);
     136      }
     137    else
     138      {
     139        r[n] = h;
     140        if (h)
     141  	_mpn_modbnp1_pn_ip(r, n, h);
     142      }
     143  }
     144  
     145  /* {rp, rn + 1} = {op, on} mod (B^{rn}+1) */
     146  /* Used when rn < on < 2*rn. */
     147  static void
     148  _mpn_modbnp1 (mp_ptr rp, mp_size_t rn, mp_srcptr op, mp_size_t on)
     149  {
     150    mp_limb_t bw;
     151  
     152  #if 0
     153    if (UNLIKELY (on <= rn))
     154      {
     155        MPN_COPY (rp, op, on);
     156        MPN_ZERO (rp + on, rn - on);
     157        return;
     158      }
     159  #endif
     160  
     161    ASSERT (on > rn);
     162    ASSERT (on <= 2 * rn);
     163  
     164    bw = mpn_sub (rp, op, rn, op + rn, on - rn);
     165    rp[rn] = 0;
     166    MPN_INCR_U (rp, rn + 1, bw);
     167  }
     168  
     169  /* {rp, rn + 1} = {op, k * rn + 1} % (B^{rn}+1) */
     170  /* With odd k >= 3. */
     171  static void
     172  _mpn_modbnp1_kn (mp_ptr rp, mp_srcptr op, mp_size_t rn, unsigned k)
     173  {
     174    mp_limb_t cy;
     175  
     176  #if MOD_BKNP1_ONLY3
     177    ASSERT (k == 3);
     178    k = 3;
     179  #endif
     180    ASSERT (k & 1);
     181    k >>= 1;
     182    ASSERT (0 < k && k < GMP_NUMB_HIGHBIT - 3);
     183    ASSERT (op[(1 + 2 * k) * rn] < GMP_NUMB_HIGHBIT - 2 - k);
     184  
     185    cy = - mpn_sub_n (rp, op, op + rn, rn);
     186    for (;;) {
     187      op += 2 * rn;
     188      cy += mpn_add_n (rp, rp, op, rn);
     189      if (--k == 0)
     190        break;
     191      cy -= mpn_sub_n (rp, rp, op + rn, rn);
     192    };
     193  
     194    cy += op[rn];
     195    _mpn_modbnp1_nc_ip (rp, rn, cy);
     196  }
     197  
     198  /* For the various mpn_divexact_byN here, fall back to using either
     199     mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
     200     faster if it is native.  For now, since mpn_divexact_1 is native on
     201     platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
     202     mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
     203  
     204  #ifndef mpn_divexact_by5
     205  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
     206  #define BINVERT_5 \
     207    ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 5 * 3 << 3) + 5) & GMP_NUMB_MAX)
     208  #define mpn_divexact_by5(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,5,BINVERT_5,0)
     209  #else
     210  #define mpn_divexact_by5(dst,src,size) mpn_divexact_1(dst,src,size,5)
     211  #endif
     212  #endif
     213  
     214  #ifndef mpn_divexact_by7
     215  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
     216  #define BINVERT_7 \
     217    ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 3)) / 7 * 3 << 4) + 7) & GMP_NUMB_MAX)
     218  #define mpn_divexact_by7(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,7,BINVERT_7,0)
     219  #else
     220  #define mpn_divexact_by7(dst,src,size) mpn_divexact_1(dst,src,size,7)
     221  #endif
     222  #endif
     223  
     224  #ifndef mpn_divexact_by11
     225  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
     226  #define BINVERT_11 \
     227    ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 10)) / 11 << 5) + 3) & GMP_NUMB_MAX)
     228  #define mpn_divexact_by11(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,11,BINVERT_11,0)
     229  #else
     230  #define mpn_divexact_by11(dst,src,size) mpn_divexact_1(dst,src,size,11)
     231  #endif
     232  #endif
     233  
     234  #ifndef mpn_divexact_by13
     235  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
     236  #define BINVERT_13 \
     237    ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 12)) / 13 * 3 << 14) + 3781) & GMP_NUMB_MAX)
     238  #define mpn_divexact_by13(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,13,BINVERT_13,0)
     239  #else
     240  #define mpn_divexact_by13(dst,src,size) mpn_divexact_1(dst,src,size,13)
     241  #endif
     242  #endif
     243  
     244  #ifndef mpn_divexact_by17
     245  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
     246  #define BINVERT_17 \
     247    ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 8)) / 17 * 15 << 7) + 113) & GMP_NUMB_MAX)
     248  #define mpn_divexact_by17(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,17,BINVERT_17,0)
     249  #else
     250  #define mpn_divexact_by17(dst,src,size) mpn_divexact_1(dst,src,size,17)
     251  #endif
     252  #endif
     253  
     254  /* Thanks to Chinese remainder theorem, store
     255     in {rp, k*n+1} the value mod (B^(k*n)+1), given
     256     {ap, k*n+1} mod ((B^(k*n)+1)/(B^n+1)) and
     257     {bp, n+1} mod (B^n+1) .
     258     {tp, n+1} is a scratch area.
     259     tp == rp or rp == ap are possible.
     260  */
     261  static void
     262  _mpn_crt (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
     263  	  mp_size_t n, unsigned k, mp_ptr tp)
     264  {
     265    mp_limb_t mod;
     266    unsigned i;
     267  
     268  #if MOD_BKNP1_ONLY3
     269    ASSERT (k == 3);
     270    k = 3;
     271  #endif
     272    _mpn_modbnp1_kn (tp, ap, n, k);
     273    if (mpn_sub_n (tp, bp, tp, n + 1))
     274      _mpn_modbnp1_neg_ip (tp, n, tp[n]);
     275  
     276  #if MOD_BKNP1_USE11
     277    if (UNLIKELY (k == 11))
     278      {
     279        ASSERT (GMP_NUMB_BITS % 2 == 0);
     280        /* mod <- -Mod(B^n+1,11)^-1 */
     281        mod = n * (GMP_NUMB_BITS % 5) % 5;
     282        if ((mod > 2) || UNLIKELY (mod == 0))
     283  	mod += 5;
     284  
     285        mod *= mpn_mod_1 (tp, n + 1, 11);
     286      }
     287    else
     288  #endif
     289      {
     290  #if GMP_NUMB_BITS % 8 == 0
     291    /* (2^6 - 1) | (2^{GMP_NUMB_BITS*3/4} - 1)	*/
     292    /* (2^6 - 1) = 3^2 * 7			*/
     293    mod = mpn_mod_34lsub1 (tp, n + 1);
     294    ASSERT ((GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2)) % k == 0);
     295    /* (2^12 - 1) = 3^2 * 5 * 7 * 13		*/
     296    /* (2^24 - 1) = 3^2 * 5 * 7 * 13 * 17 * 241	*/
     297    ASSERT (k == 3 || k == 5 || k == 7 || k == 13 || k == 17);
     298  
     299  #if GMP_NUMB_BITS % 3 != 0
     300    if (UNLIKELY (k != 3))
     301      {
     302        ASSERT ((GMP_NUMB_MAX % k == 0) || (n % 3 != 0));
     303        if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
     304  	mod <<= 1; /* k >> 1 = 1 << 1 */
     305        else if ((GMP_NUMB_BITS % 16 != 0) || LIKELY (k == 7))
     306  	mod <<= (n << (GMP_NUMB_BITS % 3 >> 1)) % 3;
     307        else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
     308  	mod *= ((n << (GMP_NUMB_BITS % 3 >> 1)) % 3 == 1) ? 3 : 9;
     309        else /* k == 17 */
     310  	mod <<= 3; /* k >> 1 = 1 << 3 */
     311  #if 0
     312        if ((GMP_NUMB_BITS == 8) /* && (k == 7) */ ||
     313  	  (GMP_NUMB_BITS == 16) && (k == 13))
     314  	mod = ((mod & (GMP_NUMB_MAX >> (GMP_NUMB_BITS >> 2))) +
     315  	       (mod >> (3 * GMP_NUMB_BITS >> 2)));
     316  #endif
     317      }
     318  #else
     319    ASSERT (GMP_NUMB_MAX % k == 0);
     320    /* 2^{GMP_NUMB_BITS} - 1	= 0 (mod k) */
     321    /* 2^{GMP_NUMB_BITS}		= 1 (mod k) */
     322    /* 2^{n*GMP_NUMB_BITS} + 1	= 2 (mod k) */
     323    /* -2^{-1}	= k >> 1 (mod k) */
     324    mod *= k >> 1;
     325  #endif
     326  #else
     327    ASSERT_ALWAYS (k == 0); /* Not implemented, should not be used. */
     328  #endif
     329      }
     330  
     331    MPN_INCR_U (tp, n + 1, mod);
     332    tp[n] += mod;
     333  
     334    if (LIKELY (k == 3))
     335      ASSERT_NOCARRY (mpn_divexact_by3 (tp, tp, n + 1));
     336    else if ((GMP_NUMB_BITS % 16 == 0) && LIKELY (k == 5))
     337      mpn_divexact_by5 (tp, tp, n + 1);
     338    else if (((! MOD_BKNP1_USE11) && (GMP_NUMB_BITS % 16 != 0))
     339  	   || LIKELY (k == 7))
     340      mpn_divexact_by7 (tp, tp, n + 1);
     341  #if MOD_BKNP1_USE11
     342    else if (k == 11)
     343      mpn_divexact_by11 (tp, tp, n + 1);
     344  #endif
     345    else if ((GMP_NUMB_BITS % 32 != 0) || LIKELY (k == 13))
     346      mpn_divexact_by13 (tp, tp, n + 1);
     347    else /* (k == 17) */
     348      mpn_divexact_by17 (tp, tp, n + 1);
     349  
     350    rp += k * n;
     351    ap += k * n; /* tp - 1 */
     352  
     353    rp -= n;
     354    ap -= n;
     355    ASSERT_NOCARRY (mpn_add_n (rp, ap, tp, n + 1));
     356  
     357    i = k >> 1;
     358    do
     359     {
     360        mp_limb_t cy, bw;
     361        rp -= n;
     362        ap -= n;
     363        bw = mpn_sub_n (rp, ap, tp, n) + tp[n];
     364        MPN_DECR_U (rp + n, (k - i * 2) * n + 1, bw);
     365        rp -= n;
     366        ap -= n;
     367        cy = mpn_add_n (rp, ap, tp, n) + tp[n];
     368        MPN_INCR_U (rp + n, (k - i * 2 + 1) * n + 1, cy);
     369      }
     370    while (--i != 0);
     371  
     372    /* if (LIKELY (rp[k * n])) */
     373      _mpn_modbnp1_pn_ip (rp, k * n, rp[k * n]);
     374  }
     375  
     376  
     377  static void
     378  _mpn_mulmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
     379  		    mp_ptr tp)
     380  {
     381    mp_limb_t cy;
     382    unsigned k;
     383  
     384    ASSERT (0 < rn);
     385    ASSERT ((ap[rn] | bp[rn]) <= 1);
     386  
     387    if (UNLIKELY (ap[rn] | bp[rn]))
     388      {
     389        if (ap[rn])
     390  	cy = bp[rn] + mpn_neg (rp, bp, rn);
     391        else /* ap[rn] == 0 */
     392  	cy = mpn_neg (rp, ap, rn);
     393      }
     394    else if (MPN_MULMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
     395      {
     396        rn /= k;
     397        mpn_mulmod_bknp1 (rp, ap, bp, rn, k, tp);
     398        return;
     399      }
     400    else
     401      {
     402        mpn_mul_n (tp, ap, bp, rn);
     403        cy = mpn_sub_n (rp, tp, tp + rn, rn);
     404      }
     405    rp[rn] = 0;
     406    MPN_INCR_U (rp, rn + 1, cy);
     407  }
     408  
     409  /* {rp, kn + 1} = {ap, kn + 1} * {bp, kn + 1} % (B^kn + 1) */
     410  /* tp must point to at least 4*(k-1)*n+1 limbs*/
     411  void
     412  mpn_mulmod_bknp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp,
     413  		  mp_size_t n, unsigned k, mp_ptr tp)
     414  {
     415    mp_ptr hp;
     416  
     417  #if MOD_BKNP1_ONLY3
     418    ASSERT (k == 3);
     419    k = 3;
     420  #endif
     421    ASSERT (k > 2);
     422    ASSERT (k % 2 == 1);
     423  
     424    /* a % (B^{nn}+1)/(B^{nn/k}+1) */
     425    _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
     426    /* b % (B^{nn}+1)/(B^{nn/k}+1) */
     427    _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 3, bp, n, k);
     428    mpn_mul_n (tp, tp + (k - 1) * n * 2, tp + (k - 1) * n * 3, (k - 1) * n);
     429    _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
     430  
     431    hp = tp + k * n + 1;
     432    /* a % (B^{nn/k}+1) */
     433    ASSERT (ap[k * n] <= 1);
     434    _mpn_modbnp1_kn (hp, ap, n, k);
     435    /* b % (B^{nn/k}+1) */
     436    ASSERT (bp[k * n] <= 1);
     437    _mpn_modbnp1_kn (hp + n + 1, bp, n, k);
     438    _mpn_mulmod_bnp1_tp (hp + (n + 1) * 2, hp, hp + n + 1, n, hp + (n + 1) * 2);
     439  
     440    _mpn_crt (rp, tp, hp + (n + 1) * 2, n, k, hp);
     441  }
     442  
     443  
     444  static void
     445  _mpn_sqrmod_bnp1_tp (mp_ptr rp, mp_srcptr ap, mp_size_t rn,
     446  		    mp_ptr tp)
     447  {
     448    mp_limb_t cy;
     449    unsigned k;
     450  
     451    ASSERT (0 < rn);
     452  
     453    if (UNLIKELY (ap[rn]))
     454      {
     455        ASSERT (ap[rn] == 1);
     456        *rp = 1;
     457        MPN_FILL (rp + 1, rn, 0);
     458        return;
     459      }
     460    else if (MPN_SQRMOD_BKNP1_USABLE(rn, k, MUL_FFT_MODF_THRESHOLD / 3))
     461      {
     462        rn /= k;
     463        mpn_sqrmod_bknp1 (rp, ap, rn, k, tp);
     464        return;
     465      }
     466    else
     467      {
     468        mpn_sqr (tp, ap, rn);
     469        cy = mpn_sub_n (rp, tp, tp + rn, rn);
     470      }
     471    rp[rn] = 0;
     472    MPN_INCR_U (rp, rn + 1, cy);
     473  }
     474  
     475  /* {rp, kn + 1} = {ap, kn + 1}^2 % (B^kn + 1) */
     476  /* tp must point to at least 3*(k-1)*n+1 limbs*/
     477  void
     478  mpn_sqrmod_bknp1 (mp_ptr rp, mp_srcptr ap,
     479  		  mp_size_t n, unsigned k, mp_ptr tp)
     480  {
     481    mp_ptr hp;
     482  
     483  #if MOD_BKNP1_ONLY3
     484    ASSERT (k == 3);
     485    k = 3;
     486  #endif
     487    ASSERT (k > 2);
     488    ASSERT (k % 2 == 1);
     489  
     490    /* a % (B^{nn}+1)/(B^{nn/k}+1) */
     491    _mpn_modbknp1dbnp1_n (tp + (k - 1) * n * 2, ap, n, k);
     492    mpn_sqr (tp, tp + (k - 1) * n * 2, (k - 1) * n);
     493    _mpn_modbnp1 (tp, k * n, tp, (k - 1) * n * 2);
     494  
     495    hp = tp + k * n + 1;
     496    /* a % (B^{nn/k}+1) */
     497    ASSERT (ap[k * n] <= 1);
     498    _mpn_modbnp1_kn (hp, ap, n, k);
     499    _mpn_sqrmod_bnp1_tp (hp + (n + 1), hp, n, hp + (n + 1));
     500  
     501    _mpn_crt (rp, tp, hp + (n + 1), n, k, hp);
     502  }