1  /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
       2  
       3     Contributed to the GNU project by Marco Bodrato.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  #include "gmp-impl.h"
      38  
      39  #define BINVERT_3 MODLIMB_INVERSE_3
      40  
      41  #define BINVERT_15 \
      42    ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
      43  
      44  #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
      45  
      46  #ifndef mpn_divexact_by3
      47  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
      48  #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
      49  #else
      50  #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
      51  #endif
      52  #endif
      53  
      54  #ifndef mpn_divexact_by45
      55  #if GMP_NUMB_BITS % 12 == 0
      56  #define mpn_divexact_by45(dst,src,size) \
      57    (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
      58  #else
      59  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
      60  #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
      61  #else
      62  #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
      63  #endif
      64  #endif
      65  #endif
      66  
      67  #if HAVE_NATIVE_mpn_sublsh2_n_ip1
      68  #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
      69  #else
      70  #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
      71  #endif
      72  
      73  #if HAVE_NATIVE_mpn_sublsh_n
      74  #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
      75  #else
      76  static mp_limb_t
      77  DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
      78  {
      79  #if USE_MUL_1 && 0
      80    return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
      81  #else
      82    mp_limb_t __cy;
      83    __cy = mpn_lshift (ws,src,n,s);
      84    return __cy + mpn_sub_n (dst,dst,ws,n);
      85  #endif
      86  }
      87  #endif
      88  
      89  
      90  #if HAVE_NATIVE_mpn_subrsh
      91  #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
      92  #else
      93  /* This is not a correct definition, it assumes no carry */
      94  #define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
      95  do {									\
      96    mp_limb_t __cy;							\
      97    MPN_DECR_U (dst, nd, src[0] >> s);					\
      98    __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
      99    MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
     100  } while (0)
     101  #endif
     102  
     103  /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
     104     points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
     105     we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
     106     degree 7 (or 6), given the 8 (rsp. 7) values:
     107  
     108       r1 = limit at infinity of f(x) / x^7,
     109       r2 = f(4),
     110       r3 = f(-4),
     111       r4 = f(2),
     112       r5 = f(-2),
     113       r6 = f(1),
     114       r7 = f(-1),
     115       r8 = f(0).
     116  
     117     All couples of the form f(n),f(-n) must be already mixed with
     118     toom_couple_handling(f(n),...,f(-n),...)
     119  
     120     The result is stored in {pp, spt + 7*n (or 6*n)}.
     121     At entry, r8 is stored at {pp, 2n},
     122     r5 is stored at {pp + 3n, 3n + 1}.
     123  
     124     The other values are 2n+... limbs each (with most significant limbs small).
     125  
     126     All intermediate results are positive.
     127     Inputs are destroyed.
     128  */
     129  
     130  void
     131  mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
     132  			   mp_ptr r3, mp_ptr r7,
     133  			   mp_size_t spt, mp_ptr ws)
     134  {
     135    mp_limb_signed_t cy;
     136    mp_ptr r5, r1;
     137    r5 = (pp + 3 * n);			/* 3n+1 */
     138    r1 = (pp + 7 * n);			/* spt */
     139  
     140    /******************************* interpolation *****************************/
     141  
     142    DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
     143    cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
     144    MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
     145  
     146    DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
     147    cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
     148    MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
     149  
     150    r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
     151    cy = mpn_sub_n (r7, r7, r1, spt);
     152    MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
     153  
     154    ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
     155    ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
     156  
     157    ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
     158  
     159    ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
     160  
     161    mpn_divexact_by45 (r3, r3, 3 * n + 1);
     162  
     163    ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
     164  
     165    ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
     166  
     167    /* last interpolation steps... */
     168    /* ... are mixed with recomposition */
     169  
     170    /***************************** recomposition *******************************/
     171    /*
     172      pp[] prior to operations:
     173       |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
     174  
     175      summation scheme for remaining operations:
     176       |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
     177       |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
     178  	  ||_H r3|_M r3|_L*r3|
     179  				  ||_H_r7|_M_r7|_L_r7|
     180  		      ||-H r3|-M r3|-L*r3|
     181  				  ||-H*r5|-M_r5|-L_r5|
     182    */
     183  
     184    cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
     185    cy-= mpn_sub_n (pp + n, pp + n, r5, n);
     186    if (cy > 0) {
     187      MPN_INCR_U (r7 + n, 2*n + 1, 1);
     188      cy = 0;
     189    }
     190  
     191    cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
     192    MPN_DECR_U (r7 + 2*n, n + 1, cy);
     193  
     194    cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
     195    r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
     196    cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
     197    if (UNLIKELY(0 > cy))
     198      MPN_DECR_U (r5 + n + 1, 2*n, 1);
     199    else
     200      MPN_INCR_U (r5 + n + 1, 2*n, cy);
     201  
     202    ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
     203  
     204    cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
     205    MPN_INCR_U (r3 + 2*n, n + 1, cy);
     206    cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
     207    if (LIKELY(spt != n))
     208      MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
     209    else
     210      ASSERT (r3[3*n] + cy == 0);
     211  }