(root)/
gmp-6.3.0/
mpn/
generic/
toom4_sqr.c
       1  /* mpn_toom4_sqr -- Square {ap,an}.
       2  
       3     Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2006-2010, 2013, 2021 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  /* Evaluate in: -2, -1, 0, +1/2, +1, +2, +inf
      41  
      42    <-s--><--n--><--n--><--n-->
      43     ____ ______ ______ ______
      44    |_a3_|___a2_|___a1_|___a0_|
      45  
      46    v0  =   a0             ^2 #    A(0)^2
      47    v1  = ( a0+ a1+ a2+ a3)^2 #    A(1)^2   ah  <= 3
      48    vm1 = ( a0- a1+ a2- a3)^2 #   A(-1)^2  |ah| <= 1
      49    v2  = ( a0+2a1+4a2+8a3)^2 #    A(2)^2   ah  <= 14
      50    vm2 = ( a0-2a1+4a2-8a3)^2 #   A(-2)^2  -9<=ah<=4
      51    vh  = (8a0+4a1+2a2+ a3)^2 #  A(1/2)^2   ah  <= 14
      52    vinf=               a3 ^2 #  A(inf)^2
      53  */
      54  
      55  #if TUNE_PROGRAM_BUILD
      56  #define MAYBE_sqr_basecase 1
      57  #define MAYBE_sqr_toom2   1
      58  #define MAYBE_sqr_toom4   1
      59  #else
      60  #define MAYBE_sqr_basecase						\
      61    (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM2_THRESHOLD)
      62  #define MAYBE_sqr_toom2							\
      63    (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
      64  #define MAYBE_sqr_toom4							\
      65    (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
      66  #endif
      67  
      68  #define TOOM4_SQR_REC(p, a, n, ws)					\
      69    do {									\
      70      if (MAYBE_sqr_basecase						\
      71  	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
      72        mpn_sqr_basecase (p, a, n);					\
      73      else if (MAYBE_sqr_toom2						\
      74  	     && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
      75        mpn_toom2_sqr (p, a, n, ws);					\
      76      else if (! MAYBE_sqr_toom4						\
      77  	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))		\
      78        mpn_toom3_sqr (p, a, n, ws);					\
      79      else								\
      80        mpn_toom4_sqr (p, a, n, ws);					\
      81    } while (0)
      82  
      83  void
      84  mpn_toom4_sqr (mp_ptr pp,
      85  	       mp_srcptr ap, mp_size_t an,
      86  	       mp_ptr scratch)
      87  {
      88    mp_size_t n, s;
      89    mp_limb_t cy;
      90  
      91  #define a0  ap
      92  #define a1  (ap + n)
      93  #define a2  (ap + 2*n)
      94  #define a3  (ap + 3*n)
      95  
      96    n = (an + 3) >> 2;
      97  
      98    s = an - 3 * n;
      99  
     100    ASSERT (0 < s && s <= n);
     101  
     102    /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
     103     * following limb, so these must be computed in order, and we need a
     104     * one limb gap to tp. */
     105  #define v0    pp				/* 2n */
     106  #define v1    (pp + 2 * n)			/* 2n+1 */
     107  #define vinf  (pp + 6 * n)			/* s+t */
     108  #define v2    scratch				/* 2n+1 */
     109  #define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
     110  #define vh    (scratch + 4 * n + 2)		/* 2n+1 */
     111  #define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
     112  #define tp (scratch + 8*n + 5)
     113  
     114    /* No overlap with v1 */
     115  #define apx   pp				/* n+1 */
     116  #define amx   (pp + 4*n + 2)			/* n+1 */
     117  
     118    /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
     119       gives roughly 32 n/3 + log term. */
     120  
     121    /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
     122    mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
     123  
     124    TOOM4_SQR_REC (v2, apx, n + 1, tp);	/* v2,  2n+1 limbs */
     125    TOOM4_SQR_REC (vm2, amx, n + 1, tp);	/* vm2,  2n+1 limbs */
     126  
     127    /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
     128  #if HAVE_NATIVE_mpn_addlsh1_n
     129    cy = mpn_addlsh1_n (apx, a1, a0, n);
     130    cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
     131    if (s < n)
     132      {
     133        mp_limb_t cy2;
     134        cy2 = mpn_addlsh1_n (apx, a3, apx, s);
     135        apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
     136        MPN_INCR_U (apx + s, n+1-s, cy2);
     137      }
     138    else
     139      apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
     140  #else
     141    cy = mpn_lshift (apx, a0, n, 1);
     142    cy += mpn_add_n (apx, apx, a1, n);
     143    cy = 2*cy + mpn_lshift (apx, apx, n, 1);
     144    cy += mpn_add_n (apx, apx, a2, n);
     145    cy = 2*cy + mpn_lshift (apx, apx, n, 1);
     146    apx[n] = cy + mpn_add (apx, apx, n, a3, s);
     147  #endif
     148  
     149    ASSERT (apx[n] < 15);
     150  
     151    TOOM4_SQR_REC (vh, apx, n + 1, tp);	/* vh,  2n+1 limbs */
     152  
     153    /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
     154    mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
     155  
     156    TOOM4_SQR_REC (v1, apx, n + 1, tp);	/* v1,  2n+1 limbs */
     157    vm1 [2 * n] = 0;
     158    TOOM4_SQR_REC (vm1, amx, n + amx[n], tp);	/* vm1,  2n+1 limbs */
     159  
     160    TOOM4_SQR_REC (v0, a0, n, tp);
     161    TOOM4_SQR_REC (vinf, a3, s, tp);	/* vinf, 2s limbs */
     162  
     163    mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
     164  }