(root)/
gmp-6.3.0/
mpn/
generic/
toom6h_mul.c
       1  /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
       2  
       3     Contributed to the GNU project by Marco Bodrato.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  
      41  #if GMP_NUMB_BITS < 21
      42  #error Not implemented.
      43  #endif
      44  
      45  #if TUNE_PROGRAM_BUILD
      46  #define MAYBE_mul_basecase 1
      47  #define MAYBE_mul_toom22   1
      48  #define MAYBE_mul_toom33   1
      49  #define MAYBE_mul_toom6h   1
      50  #else
      51  #define MAYBE_mul_basecase						\
      52    (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
      53  #define MAYBE_mul_toom22						\
      54    (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
      55  #define MAYBE_mul_toom33						\
      56    (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
      57  #define MAYBE_mul_toom6h						\
      58    (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
      59  #endif
      60  
      61  #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
      62    do {									\
      63      if (MAYBE_mul_basecase						\
      64  	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
      65        mpn_mul_basecase (p, a, n, b, n);					\
      66        if (f)								\
      67  	mpn_mul_basecase (p2, a2, n, b2, n);				\
      68      } else if (MAYBE_mul_toom22						\
      69  	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
      70        mpn_toom22_mul (p, a, n, b, n, ws);				\
      71        if (f)								\
      72  	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
      73      } else if (MAYBE_mul_toom33						\
      74  	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
      75        mpn_toom33_mul (p, a, n, b, n, ws);				\
      76        if (f)								\
      77  	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
      78      } else if (! MAYBE_mul_toom6h					\
      79  	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
      80        mpn_toom44_mul (p, a, n, b, n, ws);				\
      81        if (f)								\
      82  	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
      83      } else {								\
      84        mpn_toom6h_mul (p, a, n, b, n, ws);				\
      85        if (f)								\
      86  	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
      87      }									\
      88    } while (0)
      89  
      90  #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
      91    do { mpn_mul (p, a, na, b, nb);			\
      92    } while (0)
      93  
      94  /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
      95     With: an >= bn >= 46, an*6 <  bn * 17.
      96     It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
      97  
      98     Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
      99  */
     100  /* Estimate on needed scratch:
     101     S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
     102     since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
     103   */
     104  
     105  void
     106  mpn_toom6h_mul   (mp_ptr pp,
     107  		  mp_srcptr ap, mp_size_t an,
     108  		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
     109  {
     110    mp_size_t n, s, t;
     111    int p, q, half;
     112    int sign;
     113  
     114    /***************************** decomposition *******************************/
     115  
     116    ASSERT (an >= bn);
     117    /* Can not handle too much unbalancement */
     118    ASSERT (bn >= 42);
     119    /* Can not handle too much unbalancement */
     120    ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
     121  
     122    /* Limit num/den is a rational number between
     123       (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
     124  #define LIMIT_numerator (18)
     125  #define LIMIT_denominat (17)
     126  
     127    if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
     128      {
     129        n = 1 + (an - 1) / (size_t) 6;
     130        p = q = 5;
     131        half = 0;
     132  
     133        s = an - 5 * n;
     134        t = bn - 5 * n;
     135      }
     136    else {
     137      if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
     138        { p = 7; q = 6; }
     139      else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
     140        { p = 7; q = 5; }
     141      else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
     142        { p = 8; q = 5; }
     143      else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
     144        { p = 8; q = 4; }
     145      else
     146        { p = 9; q = 4; }
     147  
     148      half = (p ^ q) & 1;
     149      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
     150      p--; q--;
     151  
     152      s = an - p * n;
     153      t = bn - q * n;
     154  
     155      /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
     156      if (half) { /* Recover from badly chosen splitting */
     157        if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
     158        else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
     159      }
     160    }
     161  #undef LIMIT_numerator
     162  #undef LIMIT_denominat
     163  
     164    ASSERT (0 < s && s <= n);
     165    ASSERT (0 < t && t <= n);
     166    ASSERT (half || s + t > 3);
     167    ASSERT (n > 2);
     168  
     169  #define   r4    (pp + 3 * n)			/* 3n+1 */
     170  #define   r2    (pp + 7 * n)			/* 3n+1 */
     171  #define   r0    (pp +11 * n)			/* s+t <= 2*n */
     172  #define   r5    (scratch)			/* 3n+1 */
     173  #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
     174  #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
     175  #define   v0    (pp + 7 * n)			/* n+1 */
     176  #define   v1    (pp + 8 * n+1)			/* n+1 */
     177  #define   v2    (pp + 9 * n+2)			/* n+1 */
     178  #define   v3    (scratch + 9 * n + 3)		/* n+1 */
     179  #define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
     180  #define   wse   (scratch +10 * n + 4)		/* 2n+1 */
     181  
     182    /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
     183       need all of them  */
     184  /*   if (scratch == NULL) */
     185  /*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
     186    ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
     187    ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
     188  
     189    /********************** evaluation and recursive calls *********************/
     190    /* $\pm1/2$ */
     191    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
     192  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
     193    /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
     194    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
     195    mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
     196  
     197    /* $\pm1$ */
     198    sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
     199    if (UNLIKELY (q == 3))
     200      sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
     201    else
     202      sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
     203    /* A(-1)*B(-1) */ /* A(1)*B(1) */
     204    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
     205    mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
     206  
     207    /* $\pm4$ */
     208    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
     209  	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
     210    /* A(-4)*B(-4) */
     211    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
     212    mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
     213  
     214    /* $\pm1/4$ */
     215    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
     216  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
     217    /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
     218    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
     219    mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
     220  
     221    /* $\pm2$ */
     222    sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
     223  	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
     224    /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
     225    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
     226    mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
     227  
     228  #undef v0
     229  #undef v1
     230  #undef v2
     231  #undef v3
     232  #undef wse
     233  
     234    /* A(0)*B(0) */
     235    TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
     236  
     237    /* Infinity */
     238    if (UNLIKELY (half != 0)) {
     239      if (s > t) {
     240        TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
     241      } else {
     242        TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
     243      };
     244    };
     245  
     246    mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
     247  
     248  #undef r0
     249  #undef r1
     250  #undef r2
     251  #undef r3
     252  #undef r4
     253  #undef r5
     254  #undef wsi
     255  }
     256  
     257  #undef TOOM6H_MUL_N_REC
     258  #undef TOOM6H_MUL_REC
     259  #undef MAYBE_mul_basecase
     260  #undef MAYBE_mul_toom22
     261  #undef MAYBE_mul_toom33
     262  #undef MAYBE_mul_toom6h