(root)/
gmp-6.3.0/
mpn/
generic/
sqrlo_basecase.c
       1  /* mpn_sqrlo_basecase -- Internal routine to square a natural number
       2     of length n.
       3  
       4     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
       5     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
       6  
       7  
       8  Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
       9  2016 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  #include "gmp-impl.h"
      38  #include "longlong.h"
      39  
      40  #ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
      41  #if HAVE_NATIVE_mpn_addmul_1
      42  #define SQRLO_SHORTCUT_MULTIPLICATIONS 0
      43  #else
      44  #define SQRLO_SHORTCUT_MULTIPLICATIONS 1
      45  #endif
      46  #endif
      47  
      48  #if HAVE_NATIVE_mpn_sqr_diagonal
      49  #define MPN_SQR_DIAGONAL(rp, up, n)					\
      50    mpn_sqr_diagonal (rp, up, n)
      51  #else
      52  #define MPN_SQR_DIAGONAL(rp, up, n)					\
      53    do {									\
      54      mp_size_t _i;							\
      55      for (_i = 0; _i < (n); _i++)					\
      56        {									\
      57  	mp_limb_t ul, lpl;						\
      58  	ul = (up)[_i];							\
      59  	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
      60  	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
      61        }									\
      62    } while (0)
      63  #endif
      64  
      65  #define MPN_SQRLO_DIAGONAL(rp, up, n)					\
      66    do {									\
      67      mp_size_t nhalf;							\
      68      nhalf = (n) >> 1;							\
      69      MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
      70      if (((n) & 1) != 0)							\
      71        {									\
      72  	mp_limb_t op;							\
      73  	op = (up)[nhalf];						\
      74  	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
      75        }									\
      76    } while (0)
      77  
      78  #if HAVE_NATIVE_mpn_addlsh1_n_ip1
      79  #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
      80    do {									\
      81      MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
      82      mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
      83    } while (0)
      84  #else
      85  #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
      86    do {									\
      87      MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
      88      mpn_lshift ((tp), (tp), (n) - 1, 1);				\
      89      mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
      90    } while (0)
      91  #endif
      92  
      93  /* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
      94  #define SQRLO_BASECASE_ALLOC						\
      95    (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
      96  
      97  /* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
      98  #ifndef SQRLO_SPECIAL_CASES
      99  #define SQRLO_SPECIAL_CASES 2
     100  #endif
     101  
     102  #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
     103  #define MAYBE_special_cases 1
     104  #else
     105  #define MAYBE_special_cases \
     106    ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
     107  #endif
     108  
     109  void
     110  mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     111  {
     112    mp_limb_t ul;
     113  
     114    ASSERT (n >= 1);
     115    ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
     116  
     117    ul = up[0];
     118  
     119    if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
     120      {
     121  #if SQRLO_SPECIAL_CASES == 1
     122        rp[0] = (ul * ul) & GMP_NUMB_MASK;
     123  #else
     124        if (n == 1)
     125  	rp[0] = (ul * ul) & GMP_NUMB_MASK;
     126        else
     127  	{
     128  	  mp_limb_t hi, lo, ul1;
     129  	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
     130  	  rp[0] = lo >> GMP_NAIL_BITS;
     131  	  ul1 = up[1];
     132  #if SQRLO_SPECIAL_CASES == 2
     133  	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
     134  #else
     135  	  if (n == 2)
     136  	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
     137  	  else
     138  	    {
     139  	      mp_limb_t hi1;
     140  #if GMP_NAIL_BITS != 0
     141  	      ul <<= 1;
     142  #endif
     143  	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
     144  	      hi1 += ul * up[2];
     145  #if GMP_NAIL_BITS == 0
     146  	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
     147  	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
     148  #else
     149  	      hi += lo >> GMP_NAIL_BITS;
     150  	      rp[1] = hi & GMP_NUMB_MASK;
     151  	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
     152  #endif
     153  	    }
     154  #endif
     155  	}
     156  #endif
     157      }
     158    else
     159      {
     160        mp_limb_t tp[SQRLO_BASECASE_ALLOC];
     161        mp_size_t i;
     162  
     163        /* must fit n-1 limbs in tp */
     164        ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
     165  
     166        --n;
     167  #if SQRLO_SHORTCUT_MULTIPLICATIONS
     168        {
     169  	mp_limb_t cy;
     170  
     171  	cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
     172  	for (i = 1; 2 * i + 1 < n; ++i)
     173  	  {
     174  	    ul = up[i];
     175  	    cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
     176  	  }
     177  	tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
     178        }
     179  #else
     180        mpn_mul_1 (tp, up + 1, n, ul);
     181        for (i = 1; 2 * i < n; ++i)
     182  	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
     183  #endif
     184  
     185        MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
     186      }
     187  }
     188  #undef SQRLO_SPECIAL_CASES
     189  #undef MAYBE_special_cases
     190  #undef SQRLO_BASECASE_ALLOC
     191  #undef SQRLO_SHORTCUT_MULTIPLICATIONS
     192  #undef MPN_SQR_DIAGONAL
     193  #undef MPN_SQRLO_DIAGONAL
     194  #undef MPN_SQRLO_DIAG_ADDLSH1