1  /* mpn_mod_1s_3p (ap, n, b, cps)
       2     Divide (ap,,n) by b.  Return the single-limb remainder.
       3     Requires that b < B / 3.
       4  
       5     Contributed to the GNU project by Torbjorn Granlund.
       6     Based on a suggestion by Peter L. Montgomery.
       7  
       8     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
       9     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      10     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      11  
      12  Copyright 2008-2010, 2013 Free Software Foundation, Inc.
      13  
      14  This file is part of the GNU MP Library.
      15  
      16  The GNU MP Library is free software; you can redistribute it and/or modify
      17  it under the terms of either:
      18  
      19    * the GNU Lesser General Public License as published by the Free
      20      Software Foundation; either version 3 of the License, or (at your
      21      option) any later version.
      22  
      23  or
      24  
      25    * the GNU General Public License as published by the Free Software
      26      Foundation; either version 2 of the License, or (at your option) any
      27      later version.
      28  
      29  or both in parallel, as here.
      30  
      31  The GNU MP Library is distributed in the hope that it will be useful, but
      32  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      33  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      34  for more details.
      35  
      36  You should have received copies of the GNU General Public License and the
      37  GNU Lesser General Public License along with the GNU MP Library.  If not,
      38  see https://www.gnu.org/licenses/.  */
      39  
      40  #include "gmp-impl.h"
      41  #include "longlong.h"
      42  
      43  void
      44  mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
      45  {
      46    mp_limb_t bi;
      47    mp_limb_t B1modb, B2modb, B3modb, B4modb;
      48    int cnt;
      49  
      50    ASSERT (b <= (~(mp_limb_t) 0) / 3);
      51  
      52    count_leading_zeros (cnt, b);
      53  
      54    b <<= cnt;
      55    invert_limb (bi, b);
      56  
      57    cps[0] = bi;
      58    cps[1] = cnt;
      59  
      60    B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
      61    ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
      62    cps[2] = B1modb >> cnt;
      63  
      64    udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
      65    cps[3] = B2modb >> cnt;
      66  
      67    udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
      68    cps[4] = B3modb >> cnt;
      69  
      70    udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
      71    cps[5] = B4modb >> cnt;
      72  
      73  #if WANT_ASSERT
      74    {
      75      int i;
      76      b = cps[2];
      77      for (i = 3; i <= 5; i++)
      78        {
      79  	b += cps[i];
      80  	ASSERT (b >= cps[i]);
      81        }
      82    }
      83  #endif
      84  }
      85  
      86  mp_limb_t
      87  mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
      88  {
      89    mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
      90    mp_limb_t B1modb, B2modb, B3modb, B4modb;
      91    mp_size_t i;
      92    int cnt;
      93  
      94    ASSERT (n >= 1);
      95  
      96    B1modb = cps[2];
      97    B2modb = cps[3];
      98    B3modb = cps[4];
      99    B4modb = cps[5];
     100  
     101    /* We compute n mod 3 in a tricky way, which works except for when n is so
     102       close to the maximum size that we don't need to support it.  The final
     103       cast to int is a workaround for HP cc.  */
     104    switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
     105      {
     106      case 0:
     107        umul_ppmm (ph, pl, ap[n - 2], B1modb);
     108        add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
     109        umul_ppmm (rh, rl, ap[n - 1], B2modb);
     110        add_ssaaaa (rh, rl, rh, rl, ph, pl);
     111        n -= 3;
     112        break;
     113      default:	/* n mod 3 = 1; (case 2)*/
     114        rh = 0;
     115        rl = ap[--n];
     116        break;
     117      case 1:	/* n mod 3 = 2 */
     118        rh = ap[n - 1];
     119        rl = ap[n - 2];
     120        n -= 2;
     121        break;
     122      }
     123  
     124    for (i = n - 3; i >= 0; i -= 3)
     125      {
     126        /* rr = ap[i]				< B
     127  	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
     128  	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
     129  	    + LO(rr)  * (B^3 mod b)		<= (B-1)(b-1)
     130  	    + HI(rr)  * (B^4 mod b)		<= (B-1)(b-1)
     131        */
     132        umul_ppmm (ph, pl, ap[i + 1], B1modb);
     133        add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
     134  
     135        umul_ppmm (ch, cl, ap[i + 2], B2modb);
     136        add_ssaaaa (ph, pl, ph, pl, ch, cl);
     137  
     138        umul_ppmm (ch, cl, rl, B3modb);
     139        add_ssaaaa (ph, pl, ph, pl, ch, cl);
     140  
     141        umul_ppmm (rh, rl, rh, B4modb);
     142        add_ssaaaa (rh, rl, rh, rl, ph, pl);
     143      }
     144  
     145    umul_ppmm (rh, cl, rh, B1modb);
     146    add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
     147  
     148    cnt = cps[1];
     149    bi = cps[0];
     150  
     151    r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
     152    udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
     153  
     154    return r >> cnt;
     155  }