(root)/
gmp-6.3.0/
mpn/
generic/
toom6_sqr.c
       1  /* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
       2  
       3     Contributed to the GNU project by Marco Bodrato.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2009 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  
      41  #if GMP_NUMB_BITS < 21
      42  #error Not implemented.
      43  #endif
      44  
      45  
      46  #if TUNE_PROGRAM_BUILD
      47  #define MAYBE_sqr_basecase 1
      48  #define MAYBE_sqr_above_basecase   1
      49  #define MAYBE_sqr_toom2   1
      50  #define MAYBE_sqr_above_toom2   1
      51  #define MAYBE_sqr_toom3   1
      52  #define MAYBE_sqr_above_toom3   1
      53  #define MAYBE_sqr_above_toom4   1
      54  #else
      55  #ifdef  SQR_TOOM8_THRESHOLD
      56  #define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
      57  #else
      58  #define SQR_TOOM6_MAX					\
      59    ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
      60     ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
      61     : MP_SIZE_T_MAX )
      62  #endif
      63  #define MAYBE_sqr_basecase					\
      64    (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
      65  #define MAYBE_sqr_above_basecase				\
      66    (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
      67  #define MAYBE_sqr_toom2						\
      68    (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
      69  #define MAYBE_sqr_above_toom2					\
      70    (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
      71  #define MAYBE_sqr_toom3						\
      72    (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
      73  #define MAYBE_sqr_above_toom3					\
      74    (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
      75  #define MAYBE_sqr_above_toom4					\
      76    (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
      77  #endif
      78  
      79  #define TOOM6_SQR_REC(p, a, n, ws)					\
      80    do {									\
      81      if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
      82  	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
      83        mpn_sqr_basecase (p, a, n);					\
      84      else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
      85  	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
      86        mpn_toom2_sqr (p, a, n, ws);					\
      87      else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
      88  	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
      89        mpn_toom3_sqr (p, a, n, ws);					\
      90      else if (! MAYBE_sqr_above_toom4					\
      91  	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
      92        mpn_toom4_sqr (p, a, n, ws);					\
      93      else								\
      94        mpn_toom6_sqr (p, a, n, ws);					\
      95    } while (0)
      96  
      97  void
      98  mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
      99  {
     100    mp_size_t n, s;
     101  
     102    /***************************** decomposition *******************************/
     103  
     104    ASSERT( an >= 18 );
     105  
     106    n = 1 + (an - 1) / (size_t) 6;
     107  
     108    s = an - 5 * n;
     109  
     110    ASSERT (0 < s && s <= n);
     111  
     112  #define   r4    (pp + 3 * n)			/* 3n+1 */
     113  #define   r2    (pp + 7 * n)			/* 3n+1 */
     114  #define   r0    (pp +11 * n)			/* s+t <= 2*n */
     115  #define   r5    (scratch)			/* 3n+1 */
     116  #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
     117  #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
     118  #define   v0    (pp + 7 * n)			/* n+1 */
     119  #define   v2    (pp + 9 * n+2)			/* n+1 */
     120  #define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
     121  
     122    /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
     123       need all of them, when DO_mpn_sublsh_n usea a scratch  */
     124  /*   if (scratch== NULL) */
     125  /*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
     126  
     127    /********************** evaluation and recursive calls *********************/
     128    /* $\pm1/2$ */
     129    mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
     130    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
     131    TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
     132    mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
     133  
     134    /* $\pm1$ */
     135    mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
     136    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
     137    TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
     138    mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
     139  
     140    /* $\pm4$ */
     141    mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
     142    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
     143    TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
     144    mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
     145  
     146    /* $\pm1/4$ */
     147    mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
     148    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
     149    TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
     150    mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
     151  
     152    /* $\pm2$ */
     153    mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
     154    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
     155    TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
     156    mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
     157  
     158  #undef v0
     159  #undef v2
     160  
     161    /* A(0)*B(0) */
     162    TOOM6_SQR_REC(pp, ap, n, wse);
     163  
     164    mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
     165  
     166  #undef r0
     167  #undef r1
     168  #undef r2
     169  #undef r3
     170  #undef r4
     171  #undef r5
     172  
     173  }
     174  #undef TOOM6_SQR_REC
     175  #undef MAYBE_sqr_basecase
     176  #undef MAYBE_sqr_above_basecase
     177  #undef MAYBE_sqr_toom2
     178  #undef MAYBE_sqr_above_toom2
     179  #undef MAYBE_sqr_toom3
     180  #undef MAYBE_sqr_above_toom3
     181  #undef MAYBE_sqr_above_toom4