1  /* Implementation of the algorithm for Toom-Cook 4.5-way.
       2  
       3     Contributed to the GNU project by Marco Bodrato.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2009, 2012 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  /* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
      41  static int
      42  abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
      43  {
      44    mp_limb_t  x, y;
      45    while (--n >= 0)
      46      {
      47        x = ap[n];
      48        y = bp[n];
      49        if (x != y)
      50  	{
      51  	  n++;
      52  	  if (x > y)
      53  	    {
      54  	      mpn_sub_n (rp, ap, bp, n);
      55  	      return 0;
      56  	    }
      57  	  else
      58  	    {
      59  	      mpn_sub_n (rp, bp, ap, n);
      60  	      return ~0;
      61  	    }
      62  	}
      63        rp[n] = 0;
      64      }
      65    return 0;
      66  }
      67  
      68  static int
      69  abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
      70    int result;
      71    result = abs_sub_n (rm, rp, rs, n);
      72    ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
      73    return result;
      74  }
      75  
      76  
      77  /* Toom-4.5, the splitting 6x3 unbalanced version.
      78     Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
      79  
      80    <--s-><--n--><--n--><--n--><--n--><--n-->
      81     ____ ______ ______ ______ ______ ______
      82    |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
      83  			|b2_|__b1__|__b0__|
      84  			<-t-><--n--><--n-->
      85  
      86  */
      87  #define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
      88    do {	mpn_mul_n (p, a, b, n);				\
      89    } while (0)
      90  
      91  #define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
      92    do {	mpn_mul (p, a, na, b, nb);			\
      93    } while (0)
      94  
      95  void
      96  mpn_toom63_mul (mp_ptr pp,
      97  		mp_srcptr ap, mp_size_t an,
      98  		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
      99  {
     100    mp_size_t n, s, t;
     101    mp_limb_t cy;
     102    int sign;
     103  
     104    /***************************** decomposition *******************************/
     105  #define a5  (ap + 5 * n)
     106  #define b0  (bp + 0 * n)
     107  #define b1  (bp + 1 * n)
     108  #define b2  (bp + 2 * n)
     109  
     110    ASSERT (an >= bn);
     111    n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
     112  
     113    s = an - 5 * n;
     114    t = bn - 2 * n;
     115  
     116    ASSERT (0 < s && s <= n);
     117    ASSERT (0 < t && t <= n);
     118    /* WARNING! it assumes s+t>=n */
     119    ASSERT ( s + t >= n );
     120    ASSERT ( s + t > 4);
     121    /* WARNING! it assumes n>1 */
     122    ASSERT ( n > 2);
     123  
     124  #define   r8    pp				/* 2n   */
     125  #define   r7    scratch				/* 3n+1 */
     126  #define   r5    (pp + 3*n)			/* 3n+1 */
     127  #define   v0    (pp + 3*n)			/* n+1 */
     128  #define   v1    (pp + 4*n+1)			/* n+1 */
     129  #define   v2    (pp + 5*n+2)			/* n+1 */
     130  #define   v3    (pp + 6*n+3)			/* n+1 */
     131  #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
     132  #define   r1    (pp + 7*n)			/* s+t <= 2*n */
     133  #define   ws    (scratch + 6 * n + 2)		/* ??? */
     134  
     135    /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
     136       need all of them, when DO_mpn_sublsh_n usea a scratch  */
     137  /*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
     138  
     139    /********************** evaluation and recursive calls *********************/
     140    /* $\pm4$ */
     141    sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
     142    pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
     143    /* FIXME: use addlsh */
     144    v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
     145    if ( n == t )
     146      v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
     147    else
     148      v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
     149    sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
     150    TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
     151    TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
     152    mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
     153  
     154    /* $\pm1$ */
     155    sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
     156    /* Compute bs1 and bsm1. Code taken from toom33 */
     157    cy = mpn_add (ws, b0, n, b2, t);
     158  #if HAVE_NATIVE_mpn_add_n_sub_n
     159    if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
     160      {
     161        cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
     162        v3[n] = cy >> 1;
     163        v1[n] = 0;
     164        sign = ~sign;
     165      }
     166    else
     167      {
     168        mp_limb_t cy2;
     169        cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
     170        v3[n] = cy + (cy2 >> 1);
     171        v1[n] = cy - (cy2 & 1);
     172      }
     173  #else
     174    v3[n] = cy + mpn_add_n (v3, ws, b1, n);
     175    if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
     176      {
     177        mpn_sub_n (v1, b1, ws, n);
     178        v1[n] = 0;
     179        sign = ~sign;
     180      }
     181    else
     182      {
     183        cy -= mpn_sub_n (v1, ws, b1, n);
     184        v1[n] = cy;
     185      }
     186  #endif
     187    TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
     188    TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
     189    mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
     190  
     191    /* $\pm2$ */
     192    sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
     193    pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
     194    /* FIXME: use addlsh or addlsh2 */
     195    v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
     196    if ( n == t )
     197      v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
     198    else
     199      v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
     200    sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
     201    TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
     202    TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
     203    mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
     204  
     205    /* A(0)*B(0) */
     206    TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
     207  
     208    /* Infinity */
     209    if (s > t) {
     210      TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
     211    } else {
     212      TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
     213    };
     214  
     215    mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
     216  
     217  #undef a5
     218  #undef b0
     219  #undef b1
     220  #undef b2
     221  #undef r1
     222  #undef r3
     223  #undef r5
     224  #undef v0
     225  #undef v1
     226  #undef v2
     227  #undef v3
     228  #undef r7
     229  #undef r8
     230  #undef ws
     231  }