(root)/
gmp-6.3.0/
mpn/
generic/
toom8h_mul.c
       1  /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
       2  
       3     Contributed to the GNU project by Marco Bodrato.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  
      41  #if GMP_NUMB_BITS < 29
      42  #error Not implemented.
      43  #endif
      44  
      45  #if GMP_NUMB_BITS < 43
      46  #define BIT_CORRECTION 1
      47  #define CORRECTION_BITS GMP_NUMB_BITS
      48  #else
      49  #define BIT_CORRECTION 0
      50  #define CORRECTION_BITS 0
      51  #endif
      52  
      53  
      54  #if TUNE_PROGRAM_BUILD
      55  #define MAYBE_mul_basecase 1
      56  #define MAYBE_mul_toom22   1
      57  #define MAYBE_mul_toom33   1
      58  #define MAYBE_mul_toom44   1
      59  #define MAYBE_mul_toom8h   1
      60  #else
      61  #define MAYBE_mul_basecase						\
      62    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
      63  #define MAYBE_mul_toom22						\
      64    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
      65  #define MAYBE_mul_toom33						\
      66    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
      67  #define MAYBE_mul_toom44						\
      68    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
      69  #define MAYBE_mul_toom8h						\
      70    (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
      71  #endif
      72  
      73  #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
      74    do {									\
      75      if (MAYBE_mul_basecase						\
      76  	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
      77        mpn_mul_basecase (p, a, n, b, n);					\
      78        if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
      79      } else if (MAYBE_mul_toom22						\
      80  	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
      81        mpn_toom22_mul (p, a, n, b, n, ws);				\
      82        if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
      83      } else if (MAYBE_mul_toom33						\
      84  	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
      85        mpn_toom33_mul (p, a, n, b, n, ws);				\
      86        if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
      87      } else if (MAYBE_mul_toom44						\
      88  	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
      89        mpn_toom44_mul (p, a, n, b, n, ws);				\
      90        if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
      91      } else if (! MAYBE_mul_toom8h					\
      92  	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
      93        mpn_toom6h_mul (p, a, n, b, n, ws);				\
      94        if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
      95      } else {								\
      96        mpn_toom8h_mul (p, a, n, b, n, ws);				\
      97        if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
      98      }									\
      99    } while (0)
     100  
     101  #define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
     102    do { mpn_mul (p, a, na, b, nb); } while (0)
     103  
     104  /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
     105     With: an >= bn >= 86, an*5 <  bn * 11.
     106     It _may_ work with bn<=?? and bn*?? < an*? < bn*??
     107  
     108     Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
     109  */
     110  /* Estimate on needed scratch:
     111     S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
     112     since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
     113   */
     114  
     115  void
     116  mpn_toom8h_mul   (mp_ptr pp,
     117  		  mp_srcptr ap, mp_size_t an,
     118  		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
     119  {
     120    mp_size_t n, s, t;
     121    int p, q, half;
     122    int sign;
     123  
     124    /***************************** decomposition *******************************/
     125  
     126    ASSERT (an >= bn);
     127    /* Can not handle too small operands */
     128    ASSERT (bn >= 86);
     129    /* Can not handle too much unbalancement */
     130    ASSERT (an <= bn*4);
     131    ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
     132    ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
     133    ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
     134  
     135    /* Limit num/den is a rational number between
     136       (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
     137  #define LIMIT_numerator (21)
     138  #define LIMIT_denominat (20)
     139  
     140    if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
     141      {
     142        half = 0;
     143        n = 1 + ((an - 1)>>3);
     144        p = q = 7;
     145        s = an - 7 * n;
     146        t = bn - 7 * n;
     147      }
     148    else
     149      {
     150        if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
     151  	{ p = 9; q = 8; }
     152        else if (GMP_NUMB_BITS <= 9*3 ||
     153  	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
     154  	{ p = 9; q = 7; }
     155        else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
     156  	{ p =10; q = 7; }
     157        else if (GMP_NUMB_BITS <= 10*3 ||
     158  	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
     159  	{ p =10; q = 6; }
     160        else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
     161  	{ p =11; q = 6; }
     162        else if (GMP_NUMB_BITS <= 11*3 ||
     163  	       an * 4 < 9 * bn)
     164  	{ p =11; q = 5; }
     165        else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
     166  	{ p =12; q = 5; }
     167        else if (GMP_NUMB_BITS <= 12*3 ||
     168  	       an * 9 < 28 * bn )  /* is 4*... <12*... */
     169  	{ p =12; q = 4; }
     170        else
     171  	{ p =13; q = 4; }
     172  
     173        half = (p+q)&1;
     174        n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
     175        p--; q--;
     176  
     177        s = an - p * n;
     178        t = bn - q * n;
     179  
     180        if(half) { /* Recover from badly chosen splitting */
     181  	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
     182  	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
     183        }
     184      }
     185  #undef LIMIT_numerator
     186  #undef LIMIT_denominat
     187  
     188    ASSERT (0 < s && s <= n);
     189    ASSERT (0 < t && t <= n);
     190    ASSERT (half || s + t > 3);
     191    ASSERT (n > 2);
     192  
     193  #define   r6    (pp + 3 * n)			/* 3n+1 */
     194  #define   r4    (pp + 7 * n)			/* 3n+1 */
     195  #define   r2    (pp +11 * n)			/* 3n+1 */
     196  #define   r0    (pp +15 * n)			/* s+t <= 2*n */
     197  #define   r7    (scratch)			/* 3n+1 */
     198  #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
     199  #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
     200  #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
     201  #define   v0    (pp +11 * n)			/* n+1 */
     202  #define   v1    (pp +12 * n+1)			/* n+1 */
     203  #define   v2    (pp +13 * n+2)			/* n+1 */
     204  #define   v3    (scratch +12 * n + 4)		/* n+1 */
     205  #define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
     206  #define   wse   (scratch +13 * n + 5)		/* 2n+1 */
     207  
     208    /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
     209       need all of them  */
     210  /*   if (scratch == NULL) */
     211  /*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
     212    ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
     213    ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
     214  
     215    /********************** evaluation and recursive calls *********************/
     216  
     217    /* $\pm1/8$ */
     218    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
     219  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
     220    /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
     221    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
     222    mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
     223  
     224    /* $\pm1/4$ */
     225    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
     226  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
     227    /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
     228    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
     229    mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
     230  
     231    /* $\pm2$ */
     232    sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
     233  	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
     234    /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
     235    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
     236    mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
     237  
     238    /* $\pm8$ */
     239    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
     240  	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
     241    /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
     242    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
     243    mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
     244  
     245    /* $\pm1/2$ */
     246    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
     247  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
     248    /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
     249    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
     250    mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
     251  
     252    /* $\pm1$ */
     253    sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
     254    if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
     255      sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
     256    else
     257      sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
     258    /* A(-1)*B(-1) */ /* A(1)*B(1) */
     259    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
     260    mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
     261  
     262    /* $\pm4$ */
     263    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
     264  	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
     265    /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
     266    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
     267    mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
     268  
     269  #undef v0
     270  #undef v1
     271  #undef v2
     272  #undef v3
     273  #undef wse
     274  
     275    /* A(0)*B(0) */
     276    TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
     277  
     278    /* Infinity */
     279    if (UNLIKELY (half != 0)) {
     280      if (s > t) {
     281        TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
     282      } else {
     283        TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
     284      };
     285    };
     286  
     287    mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
     288  
     289  #undef r0
     290  #undef r1
     291  #undef r2
     292  #undef r3
     293  #undef r4
     294  #undef r5
     295  #undef r6
     296  #undef wsi
     297  }
     298  
     299  #undef TOOM8H_MUL_N_REC
     300  #undef TOOM8H_MUL_REC
     301  #undef MAYBE_mul_basecase
     302  #undef MAYBE_mul_toom22
     303  #undef MAYBE_mul_toom33
     304  #undef MAYBE_mul_toom44
     305  #undef MAYBE_mul_toom8h