(root)/
gmp-6.3.0/
mpn/
generic/
toom2_sqr.c
       1  /* mpn_toom2_sqr -- Square {ap,an}.
       2  
       3     Contributed to the GNU project by Torbjorn Granlund.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  /* Evaluate in: -1, 0, +inf
      41  
      42    <-s--><--n-->
      43     ____ ______
      44    |_a1_|___a0_|
      45  
      46    v0  =  a0     ^2  #   A(0)^2
      47    vm1 = (a0- a1)^2  #  A(-1)^2
      48    vinf=      a1 ^2  # A(inf)^2
      49  */
      50  
      51  #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
      52  #define MAYBE_sqr_toom2   1
      53  #else
      54  #define MAYBE_sqr_toom2							\
      55    (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
      56  #endif
      57  
      58  #define TOOM2_SQR_REC(p, a, n, ws)					\
      59    do {									\
      60      if (! MAYBE_sqr_toom2						\
      61  	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
      62        mpn_sqr_basecase (p, a, n);					\
      63      else								\
      64        mpn_toom2_sqr (p, a, n, ws);					\
      65    } while (0)
      66  
      67  void
      68  mpn_toom2_sqr (mp_ptr pp,
      69  	       mp_srcptr ap, mp_size_t an,
      70  	       mp_ptr scratch)
      71  {
      72    const int __gmpn_cpuvec_initialized = 1;
      73    mp_size_t n, s;
      74    mp_limb_t cy, cy2;
      75    mp_ptr asm1;
      76  
      77  #define a0  ap
      78  #define a1  (ap + n)
      79  
      80    s = an >> 1;
      81    n = an - s;
      82  
      83    ASSERT (0 < s && s <= n && (n - s) == (an & 1));
      84  
      85    asm1 = pp;
      86  
      87    /* Compute asm1.  */
      88    if ((an & 1) == 0) /* s == n */
      89      {
      90        if (mpn_cmp (a0, a1, n) < 0)
      91  	{
      92  	  mpn_sub_n (asm1, a1, a0, n);
      93  	}
      94        else
      95  	{
      96  	  mpn_sub_n (asm1, a0, a1, n);
      97  	}
      98      }
      99    else /* n - s == 1 */
     100      {
     101        if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
     102  	{
     103  	  mpn_sub_n (asm1, a1, a0, s);
     104  	  asm1[s] = 0;
     105  	}
     106        else
     107  	{
     108  	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
     109  	}
     110      }
     111  
     112  #define v0	pp				/* 2n */
     113  #define vinf	(pp + 2 * n)			/* s+s */
     114  #define vm1	scratch				/* 2n */
     115  #define scratch_out	scratch + 2 * n
     116  
     117    /* vm1, 2n limbs */
     118    TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
     119  
     120    /* vinf, s+s limbs */
     121    TOOM2_SQR_REC (vinf, a1, s, scratch_out);
     122  
     123    /* v0, 2n limbs */
     124    TOOM2_SQR_REC (v0, ap, n, scratch_out);
     125  
     126    /* H(v0) + L(vinf) */
     127    cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
     128  
     129    /* L(v0) + H(v0) */
     130    cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
     131  
     132    /* L(vinf) + H(vinf) */
     133    cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
     134  
     135    cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
     136  
     137    ASSERT (cy + 1 <= 3);
     138    ASSERT (cy2 <= 2);
     139  
     140    if (LIKELY (cy <= 2)) {
     141      MPN_INCR_U (pp + 2 * n, s + s, cy2);
     142      MPN_INCR_U (pp + 3 * n, s + s - n, cy);
     143    } else { /* cy is negative */
     144      /* The total contribution of v0+vinf-vm1 can not be negative. */
     145  #if WANT_ASSERT
     146      /* The borrow in cy stops the propagation of the carry cy2, */
     147      ASSERT (cy2 == 1);
     148      cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
     149      ASSERT (cy == 0);
     150  #else
     151      /* we simply fill the area with zeros. */
     152      MPN_FILL (pp + 2 * n, n, 0);
     153  #endif
     154    }
     155  }