(root)/
gmp-6.3.0/
mpn/
generic/
toom8_sqr.c
       1  /* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
       2  
       3     Contributed to the GNU project by Marco Bodrato.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2009, 2012 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  #if GMP_NUMB_BITS < 29
      41  #error Not implemented.
      42  #endif
      43  
      44  #if GMP_NUMB_BITS < 43
      45  #define BIT_CORRECTION 1
      46  #define CORRECTION_BITS GMP_NUMB_BITS
      47  #else
      48  #define BIT_CORRECTION 0
      49  #define CORRECTION_BITS 0
      50  #endif
      51  
      52  #ifndef SQR_TOOM8_THRESHOLD
      53  #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
      54  #endif
      55  
      56  #ifndef SQR_TOOM6_THRESHOLD
      57  #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
      58  #endif
      59  
      60  #if TUNE_PROGRAM_BUILD
      61  #define MAYBE_sqr_basecase 1
      62  #define MAYBE_sqr_above_basecase   1
      63  #define MAYBE_sqr_toom2   1
      64  #define MAYBE_sqr_above_toom2   1
      65  #define MAYBE_sqr_toom3   1
      66  #define MAYBE_sqr_above_toom3   1
      67  #define MAYBE_sqr_toom4   1
      68  #define MAYBE_sqr_above_toom4   1
      69  #define MAYBE_sqr_above_toom6   1
      70  #else
      71  #define SQR_TOOM8_MAX					\
      72    ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
      73     ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
      74     : MP_SIZE_T_MAX )
      75  #define MAYBE_sqr_basecase					\
      76    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
      77  #define MAYBE_sqr_above_basecase				\
      78    (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
      79  #define MAYBE_sqr_toom2						\
      80    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
      81  #define MAYBE_sqr_above_toom2					\
      82    (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
      83  #define MAYBE_sqr_toom3						\
      84    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
      85  #define MAYBE_sqr_above_toom3					\
      86    (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
      87  #define MAYBE_sqr_toom4						\
      88    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
      89  #define MAYBE_sqr_above_toom4					\
      90    (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
      91  #define MAYBE_sqr_above_toom6					\
      92    (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
      93  #endif
      94  
      95  #define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
      96    do {									\
      97      if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
      98  	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
      99        mpn_sqr_basecase (p, a, n);					\
     100        if (f) mpn_sqr_basecase (p2, a2, n);				\
     101      } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
     102  	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
     103        mpn_toom2_sqr (p, a, n, ws);					\
     104        if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
     105      } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
     106  	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
     107        mpn_toom3_sqr (p, a, n, ws);					\
     108        if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
     109      } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
     110  	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
     111        mpn_toom4_sqr (p, a, n, ws);					\
     112        if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
     113      } else if (! MAYBE_sqr_above_toom6					\
     114  	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
     115        mpn_toom6_sqr (p, a, n, ws);					\
     116        if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
     117      } else {								\
     118        mpn_toom8_sqr (p, a, n, ws);					\
     119        if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
     120      }									\
     121    } while (0)
     122  
     123  void
     124  mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
     125  {
     126    mp_size_t n, s;
     127  
     128    /***************************** decomposition *******************************/
     129  
     130    ASSERT ( an >= 40 );
     131  
     132    n = 1 + ((an - 1)>>3);
     133  
     134    s = an - 7 * n;
     135  
     136    ASSERT (0 < s && s <= n);
     137    ASSERT ( s + s > 3 );
     138  
     139  #define   r6    (pp + 3 * n)			/* 3n+1 */
     140  #define   r4    (pp + 7 * n)			/* 3n+1 */
     141  #define   r2    (pp +11 * n)			/* 3n+1 */
     142  #define   r0    (pp +15 * n)			/* s+t <= 2*n */
     143  #define   r7    (scratch)			/* 3n+1 */
     144  #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
     145  #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
     146  #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
     147  #define   v0    (pp +11 * n)			/* n+1 */
     148  #define   v2    (pp +13 * n+2)			/* n+1 */
     149  #define   wse   (scratch +12 * n + 4)		/* 3n+1 */
     150  
     151    /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
     152       need all of them, when DO_mpn_sublsh_n usea a scratch  */
     153  /*   if (scratch == NULL) */
     154  /*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
     155  
     156    /********************** evaluation and recursive calls *********************/
     157    /* $\pm1/8$ */
     158    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
     159    /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
     160    TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
     161    mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
     162  
     163    /* $\pm1/4$ */
     164    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
     165    /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
     166    TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
     167    mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
     168  
     169    /* $\pm2$ */
     170    mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
     171    /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
     172    TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
     173    mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
     174  
     175    /* $\pm8$ */
     176    mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
     177    /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
     178    TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
     179    mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
     180  
     181    /* $\pm1/2$ */
     182    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
     183    /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
     184    TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
     185    mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
     186  
     187    /* $\pm1$ */
     188    mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
     189    /* A(-1)*B(-1) */ /* A(1)*B(1) */
     190    TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
     191    mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
     192  
     193    /* $\pm4$ */
     194    mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
     195    /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
     196    TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
     197    mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
     198  
     199  #undef v0
     200  #undef v2
     201  
     202    /* A(0)*B(0) */
     203    TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
     204  
     205    mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
     206  
     207  #undef r0
     208  #undef r1
     209  #undef r2
     210  #undef r3
     211  #undef r4
     212  #undef r5
     213  #undef r6
     214  #undef wse
     215  
     216  }
     217  
     218  #undef TOOM8_SQR_REC
     219  #undef MAYBE_sqr_basecase
     220  #undef MAYBE_sqr_above_basecase
     221  #undef MAYBE_sqr_toom2
     222  #undef MAYBE_sqr_above_toom2
     223  #undef MAYBE_sqr_toom3
     224  #undef MAYBE_sqr_above_toom3
     225  #undef MAYBE_sqr_above_toom4