1  /* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
       2     times as large as bn.  Or more accurately, bn < an < 2 bn.
       3  
       4     Contributed to the GNU project by Marco Bodrato.
       5  
       6     The idea of applying toom to unbalanced multiplication is due to Marco
       7     Bodrato and Alberto Zanoni.
       8  
       9     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      10     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      11     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      12  
      13  Copyright 2009 Free Software Foundation, Inc.
      14  
      15  This file is part of the GNU MP Library.
      16  
      17  The GNU MP Library is free software; you can redistribute it and/or modify
      18  it under the terms of either:
      19  
      20    * the GNU Lesser General Public License as published by the Free
      21      Software Foundation; either version 3 of the License, or (at your
      22      option) any later version.
      23  
      24  or
      25  
      26    * the GNU General Public License as published by the Free Software
      27      Foundation; either version 2 of the License, or (at your option) any
      28      later version.
      29  
      30  or both in parallel, as here.
      31  
      32  The GNU MP Library is distributed in the hope that it will be useful, but
      33  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      34  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      35  for more details.
      36  
      37  You should have received copies of the GNU General Public License and the
      38  GNU Lesser General Public License along with the GNU MP Library.  If not,
      39  see https://www.gnu.org/licenses/.  */
      40  
      41  
      42  #include "gmp-impl.h"
      43  
      44  /* Evaluate in: -2, -1, 0, +1, +2, +inf
      45  
      46    <-s-><--n--><--n--><--n--><--n-->
      47     ___ ______ ______ ______ ______
      48    |a4_|___a3_|___a2_|___a1_|___a0_|
      49  			|b1|___b0_|
      50  			<t-><--n-->
      51  
      52    v0  =  a0                  * b0      #   A(0)*B(0)
      53    v1  = (a0+ a1+ a2+ a3+  a4)*(b0+ b1) #   A(1)*B(1)      ah  <= 4   bh <= 1
      54    vm1 = (a0- a1+ a2- a3+  a4)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 2   bh  = 0
      55    v2  = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) #   A(2)*B(2)      ah  <= 30  bh <= 2
      56    vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) #  A(-2)*B(-2)    |ah| <= 20 |bh|<= 1
      57    vinf=                   a4 *     b1  # A(inf)*B(inf)
      58  
      59    Some slight optimization in evaluation are taken from the paper:
      60    "Towards Optimal Toom-Cook Multiplication for Univariate and
      61    Multivariate Polynomials in Characteristic 2 and 0."
      62  */
      63  
      64  void
      65  mpn_toom52_mul (mp_ptr pp,
      66  		mp_srcptr ap, mp_size_t an,
      67  		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
      68  {
      69    mp_size_t n, s, t;
      70    enum toom6_flags flags;
      71  
      72  #define a0  ap
      73  #define a1  (ap + n)
      74  #define a2  (ap + 2 * n)
      75  #define a3  (ap + 3 * n)
      76  #define a4  (ap + 4 * n)
      77  #define b0  bp
      78  #define b1  (bp + n)
      79  
      80    n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
      81  
      82    s = an - 4 * n;
      83    t = bn - n;
      84  
      85    ASSERT (0 < s && s <= n);
      86    ASSERT (0 < t && t <= n);
      87  
      88    /* Ensures that 5 values of n+1 limbs each fits in the product area.
      89       Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
      90       n = 8. */
      91    ASSERT (s+t >= 5);
      92  
      93  #define v0    pp				/* 2n */
      94  #define vm1   (scratch)				/* 2n+1 */
      95  #define v1    (pp + 2 * n)			/* 2n+1 */
      96  #define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
      97  #define v2    (scratch + 4 * n + 2)		/* 2n+1 */
      98  #define vinf  (pp + 5 * n)			/* s+t */
      99  #define bs1    pp				/* n+1 */
     100  #define bsm1  (scratch + 2 * n + 2)		/* n   */
     101  #define asm1  (scratch + 3 * n + 3)		/* n+1 */
     102  #define asm2  (scratch + 4 * n + 4)		/* n+1 */
     103  #define bsm2  (pp + n + 1)			/* n+1 */
     104  #define bs2   (pp + 2 * n + 2)			/* n+1 */
     105  #define as2   (pp + 3 * n + 3)			/* n+1 */
     106  #define as1   (pp + 4 * n + 4)			/* n+1 */
     107  
     108    /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
     109       products will overwrite 2n+2 limbs. */
     110  
     111  #define a0a2  scratch
     112  #define a1a3  asm1
     113  
     114    /* Compute as2 and asm2.  */
     115    flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
     116  
     117    /* Compute bs1 and bsm1.  */
     118    if (t == n)
     119      {
     120  #if HAVE_NATIVE_mpn_add_n_sub_n
     121        mp_limb_t cy;
     122  
     123        if (mpn_cmp (b0, b1, n) < 0)
     124  	{
     125  	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
     126  	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
     127  	}
     128        else
     129  	{
     130  	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
     131  	}
     132        bs1[n] = cy >> 1;
     133  #else
     134        bs1[n] = mpn_add_n (bs1, b0, b1, n);
     135        if (mpn_cmp (b0, b1, n) < 0)
     136  	{
     137  	  mpn_sub_n (bsm1, b1, b0, n);
     138  	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
     139  	}
     140        else
     141  	{
     142  	  mpn_sub_n (bsm1, b0, b1, n);
     143  	}
     144  #endif
     145      }
     146    else
     147      {
     148        bs1[n] = mpn_add (bs1, b0, n, b1, t);
     149        if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
     150  	{
     151  	  mpn_sub_n (bsm1, b1, b0, t);
     152  	  MPN_ZERO (bsm1 + t, n - t);
     153  	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
     154  	}
     155        else
     156  	{
     157  	  mpn_sub (bsm1, b0, n, b1, t);
     158  	}
     159      }
     160  
     161    /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1  */
     162    mpn_add (bs2, bs1, n+1, b1, t);
     163    if (flags & toom6_vm1_neg)
     164      {
     165        bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
     166        flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
     167      }
     168    else
     169      {
     170        bsm2[n] = 0;
     171        if (t == n)
     172  	{
     173  	  if (mpn_cmp (bsm1, b1, n) < 0)
     174  	    {
     175  	      mpn_sub_n (bsm2, b1, bsm1, n);
     176  	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
     177  	    }
     178  	  else
     179  	    {
     180  	      mpn_sub_n (bsm2, bsm1, b1, n);
     181  	    }
     182  	}
     183        else
     184  	{
     185  	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
     186  	    {
     187  	      mpn_sub_n (bsm2, b1, bsm1, t);
     188  	      MPN_ZERO (bsm2 + t, n - t);
     189  	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
     190  	    }
     191  	  else
     192  	    {
     193  	      mpn_sub (bsm2, bsm1, n, b1, t);
     194  	    }
     195  	}
     196      }
     197  
     198    /* Compute as1 and asm1.  */
     199    flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2)));
     200  
     201    ASSERT (as1[n] <= 4);
     202    ASSERT (bs1[n] <= 1);
     203    ASSERT (asm1[n] <= 2);
     204  /*   ASSERT (bsm1[n] <= 1); */
     205    ASSERT (as2[n] <=30);
     206    ASSERT (bs2[n] <= 2);
     207    ASSERT (asm2[n] <= 20);
     208    ASSERT (bsm2[n] <= 1);
     209  
     210    /* vm1, 2n+1 limbs */
     211    mpn_mul (vm1, asm1, n+1, bsm1, n);  /* W4 */
     212  
     213    /* vm2, 2n+1 limbs */
     214    mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
     215  
     216    /* v2, 2n+1 limbs */
     217    mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
     218  
     219    /* v1, 2n+1 limbs */
     220    mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
     221  
     222    /* vinf, s+t limbs */   /* W0 */
     223    if (s > t)  mpn_mul (vinf, a4, s, b1, t);
     224    else        mpn_mul (vinf, b1, t, a4, s);
     225  
     226    /* v0, 2n limbs */
     227    mpn_mul_n (v0, ap, bp, n);  /* W5 */
     228  
     229    mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
     230  
     231  #undef v0
     232  #undef vm1
     233  #undef v1
     234  #undef vm2
     235  #undef v2
     236  #undef vinf
     237  #undef bs1
     238  #undef bs2
     239  #undef bsm1
     240  #undef bsm2
     241  #undef asm1
     242  #undef asm2
     243  #undef as1
     244  #undef as2
     245  #undef a0a2
     246  #undef b0b2
     247  #undef a1a3
     248  #undef a0
     249  #undef a1
     250  #undef a2
     251  #undef a3
     252  #undef b0
     253  #undef b1
     254  #undef b2
     255  
     256  }