(root)/
gmp-6.3.0/
mpn/
generic/
toom62_mul.c
       1  /* mpn_toom62_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 3 times
       2     as large as bn.  Or more accurately, (5/2)bn < an < 6bn.
       3  
       4     Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
       5  
       6     The idea of applying toom to unbalanced multiplication is due to Marco
       7     Bodrato and Alberto Zanoni.
       8  
       9     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      10     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      11     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      12  
      13  Copyright 2006-2008, 2012 Free Software Foundation, Inc.
      14  
      15  This file is part of the GNU MP Library.
      16  
      17  The GNU MP Library is free software; you can redistribute it and/or modify
      18  it under the terms of either:
      19  
      20    * the GNU Lesser General Public License as published by the Free
      21      Software Foundation; either version 3 of the License, or (at your
      22      option) any later version.
      23  
      24  or
      25  
      26    * the GNU General Public License as published by the Free Software
      27      Foundation; either version 2 of the License, or (at your option) any
      28      later version.
      29  
      30  or both in parallel, as here.
      31  
      32  The GNU MP Library is distributed in the hope that it will be useful, but
      33  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      34  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      35  for more details.
      36  
      37  You should have received copies of the GNU General Public License and the
      38  GNU Lesser General Public License along with the GNU MP Library.  If not,
      39  see https://www.gnu.org/licenses/.  */
      40  
      41  
      42  #include "gmp-impl.h"
      43  
      44  /* Evaluate in:
      45     0, +1, -1, +2, -2, 1/2, +inf
      46  
      47    <-s-><--n--><--n--><--n--><--n--><--n-->
      48     ___ ______ ______ ______ ______ ______
      49    |a5_|___a4_|___a3_|___a2_|___a1_|___a0_|
      50  			     |_b1_|___b0_|
      51  			     <-t--><--n-->
      52  
      53    v0  =    a0                       *   b0      #    A(0)*B(0)
      54    v1  = (  a0+  a1+ a2+ a3+  a4+  a5)*( b0+ b1) #    A(1)*B(1)      ah  <= 5   bh <= 1
      55    vm1 = (  a0-  a1+ a2- a3+  a4-  a5)*( b0- b1) #   A(-1)*B(-1)    |ah| <= 2   bh  = 0
      56    v2  = (  a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) #    A(2)*B(2)      ah  <= 62  bh <= 2
      57    vm2 = (  a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) #   A(-2)*B(-2)    -41<=ah<=20 -1<=bh<=0
      58    vh  = (32a0+16a1+8a2+4a3+ 2a4+  a5)*(2b0+ b1) #  A(1/2)*B(1/2)    ah  <= 62  bh <= 2
      59    vinf=                           a5 *      b1  #  A(inf)*B(inf)
      60  */
      61  
      62  void
      63  mpn_toom62_mul (mp_ptr pp,
      64  		mp_srcptr ap, mp_size_t an,
      65  		mp_srcptr bp, mp_size_t bn,
      66  		mp_ptr scratch)
      67  {
      68    mp_size_t n, s, t;
      69    mp_limb_t cy;
      70    mp_ptr as1, asm1, as2, asm2, ash;
      71    mp_ptr bs1, bsm1, bs2, bsm2, bsh;
      72    mp_ptr gp;
      73    enum toom7_flags aflags, bflags;
      74    TMP_DECL;
      75  
      76  #define a0  ap
      77  #define a1  (ap + n)
      78  #define a2  (ap + 2*n)
      79  #define a3  (ap + 3*n)
      80  #define a4  (ap + 4*n)
      81  #define a5  (ap + 5*n)
      82  #define b0  bp
      83  #define b1  (bp + n)
      84  
      85    n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
      86  
      87    s = an - 5 * n;
      88    t = bn - n;
      89  
      90    ASSERT (0 < s && s <= n);
      91    ASSERT (0 < t && t <= n);
      92  
      93    TMP_MARK;
      94  
      95    as1 = TMP_SALLOC_LIMBS (n + 1);
      96    asm1 = TMP_SALLOC_LIMBS (n + 1);
      97    as2 = TMP_SALLOC_LIMBS (n + 1);
      98    asm2 = TMP_SALLOC_LIMBS (n + 1);
      99    ash = TMP_SALLOC_LIMBS (n + 1);
     100  
     101    bs1 = TMP_SALLOC_LIMBS (n + 1);
     102    bsm1 = TMP_SALLOC_LIMBS (n);
     103    bs2 = TMP_SALLOC_LIMBS (n + 1);
     104    bsm2 = TMP_SALLOC_LIMBS (n + 1);
     105    bsh = TMP_SALLOC_LIMBS (n + 1);
     106  
     107    gp = pp;
     108  
     109    /* Compute as1 and asm1.  */
     110    aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
     111  
     112    /* Compute as2 and asm2. */
     113    aflags = (enum toom7_flags) (aflags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp)));
     114  
     115    /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
     116       = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5  */
     117  
     118  #if HAVE_NATIVE_mpn_addlsh1_n
     119    cy = mpn_addlsh1_n (ash, a1, a0, n);
     120    cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
     121    cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
     122    cy = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
     123    if (s < n)
     124      {
     125        mp_limb_t cy2;
     126        cy2 = mpn_addlsh1_n (ash, a5, ash, s);
     127        ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
     128        MPN_INCR_U (ash + s, n+1-s, cy2);
     129      }
     130    else
     131      ash[n] = 2*cy + mpn_addlsh1_n (ash, a5, ash, n);
     132  #else
     133    cy = mpn_lshift (ash, a0, n, 1);
     134    cy += mpn_add_n (ash, ash, a1, n);
     135    cy = 2*cy + mpn_lshift (ash, ash, n, 1);
     136    cy += mpn_add_n (ash, ash, a2, n);
     137    cy = 2*cy + mpn_lshift (ash, ash, n, 1);
     138    cy += mpn_add_n (ash, ash, a3, n);
     139    cy = 2*cy + mpn_lshift (ash, ash, n, 1);
     140    cy += mpn_add_n (ash, ash, a4, n);
     141    cy = 2*cy + mpn_lshift (ash, ash, n, 1);
     142    ash[n] = cy + mpn_add (ash, ash, n, a5, s);
     143  #endif
     144  
     145    /* Compute bs1 and bsm1.  */
     146    if (t == n)
     147      {
     148  #if HAVE_NATIVE_mpn_add_n_sub_n
     149        if (mpn_cmp (b0, b1, n) < 0)
     150  	{
     151  	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
     152  	  bflags = toom7_w3_neg;
     153  	}
     154        else
     155  	{
     156  	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
     157  	  bflags = (enum toom7_flags) 0;
     158  	}
     159        bs1[n] = cy >> 1;
     160  #else
     161        bs1[n] = mpn_add_n (bs1, b0, b1, n);
     162        if (mpn_cmp (b0, b1, n) < 0)
     163  	{
     164  	  mpn_sub_n (bsm1, b1, b0, n);
     165  	  bflags = toom7_w3_neg;
     166  	}
     167        else
     168  	{
     169  	  mpn_sub_n (bsm1, b0, b1, n);
     170  	  bflags = (enum toom7_flags) 0;
     171  	}
     172  #endif
     173      }
     174    else
     175      {
     176        bs1[n] = mpn_add (bs1, b0, n, b1, t);
     177        if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
     178  	{
     179  	  mpn_sub_n (bsm1, b1, b0, t);
     180  	  MPN_ZERO (bsm1 + t, n - t);
     181  	  bflags = toom7_w3_neg;
     182  	}
     183        else
     184  	{
     185  	  mpn_sub (bsm1, b0, n, b1, t);
     186  	  bflags = (enum toom7_flags) 0;
     187  	}
     188      }
     189  
     190    /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
     191       bsm1 - b1 */
     192    mpn_add (bs2, bs1, n + 1, b1, t);
     193    if (bflags & toom7_w3_neg)
     194      {
     195        bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
     196        bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
     197      }
     198    else
     199      {
     200        /* FIXME: Simplify this logic? */
     201        if (t < n)
     202  	{
     203  	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
     204  	    {
     205  	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
     206  	      MPN_ZERO (bsm2 + t, n + 1 - t);
     207  	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
     208  	    }
     209  	  else
     210  	    {
     211  	      ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, t));
     212  	      bsm2[n] = 0;
     213  	    }
     214  	}
     215        else
     216  	{
     217  	  if (mpn_cmp (bsm1, b1, n) < 0)
     218  	    {
     219  	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
     220  	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
     221  	    }
     222  	  else
     223  	    {
     224  	      ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
     225  	    }
     226  	  bsm2[n] = 0;
     227  	}
     228      }
     229  
     230    /* Compute bsh, recycling bs1. bsh=bs1+b0;  */
     231    bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
     232  
     233    ASSERT (as1[n] <= 5);
     234    ASSERT (bs1[n] <= 1);
     235    ASSERT (asm1[n] <= 2);
     236    ASSERT (as2[n] <= 62);
     237    ASSERT (bs2[n] <= 2);
     238    ASSERT (asm2[n] <= 41);
     239    ASSERT (bsm2[n] <= 1);
     240    ASSERT (ash[n] <= 62);
     241    ASSERT (bsh[n] <= 2);
     242  
     243  #define v0    pp				/* 2n */
     244  #define v1    (pp + 2 * n)			/* 2n+1 */
     245  #define vinf  (pp + 6 * n)			/* s+t */
     246  #define v2    scratch				/* 2n+1 */
     247  #define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
     248  #define vh    (scratch + 4 * n + 2)		/* 2n+1 */
     249  #define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
     250  #define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
     251    /* Total scratch need: 10*n+5 */
     252  
     253    /* Must be in allocation order, as they overwrite one limb beyond
     254     * 2n+1. */
     255    mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
     256    mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
     257    mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
     258  
     259    /* vm1, 2n+1 limbs */
     260    mpn_mul_n (vm1, asm1, bsm1, n);
     261    cy = 0;
     262    if (asm1[n] == 1)
     263      {
     264        cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
     265      }
     266    else if (asm1[n] == 2)
     267      {
     268  #if HAVE_NATIVE_mpn_addlsh1_n
     269        cy = mpn_addlsh1_n (vm1 + n, vm1 + n, bsm1, n);
     270  #else
     271        cy = mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2));
     272  #endif
     273      }
     274    vm1[2 * n] = cy;
     275  
     276    /* v1, 2n+1 limbs */
     277    mpn_mul_n (v1, as1, bs1, n);
     278    if (as1[n] == 1)
     279      {
     280        cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
     281      }
     282    else if (as1[n] == 2)
     283      {
     284  #if HAVE_NATIVE_mpn_addlsh1_n
     285        cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
     286  #else
     287        cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
     288  #endif
     289      }
     290    else if (as1[n] != 0)
     291      {
     292        cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]);
     293      }
     294    else
     295      cy = 0;
     296    if (bs1[n] != 0)
     297      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
     298    v1[2 * n] = cy;
     299  
     300    mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
     301  
     302    /* vinf, s+t limbs */
     303    if (s > t)  mpn_mul (vinf, a5, s, b1, t);
     304    else        mpn_mul (vinf, b1, t, a5, s);
     305  
     306    mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
     307  			     vm2, vm1, v2, vh, s + t, scratch_out);
     308  
     309    TMP_FREE;
     310  }