(root)/
gmp-6.3.0/
mpz/
oddfac_1.c
       1  /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
       2  
       3  Contributed to the GNU project by Marco Bodrato.
       4  
       5  THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
       6  IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
       7  IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
       8  DISAPPEAR IN A FUTURE GNU MP RELEASE.
       9  
      10  Copyright 2010-2012, 2015-2017, 2020, 2021 Free Software Foundation, Inc.
      11  
      12  This file is part of the GNU MP Library.
      13  
      14  The GNU MP Library is free software; you can redistribute it and/or modify
      15  it under the terms of either:
      16  
      17    * the GNU Lesser General Public License as published by the Free
      18      Software Foundation; either version 3 of the License, or (at your
      19      option) any later version.
      20  
      21  or
      22  
      23    * the GNU General Public License as published by the Free Software
      24      Foundation; either version 2 of the License, or (at your option) any
      25      later version.
      26  
      27  or both in parallel, as here.
      28  
      29  The GNU MP Library is distributed in the hope that it will be useful, but
      30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      32  for more details.
      33  
      34  You should have received copies of the GNU General Public License and the
      35  GNU Lesser General Public License along with the GNU MP Library.  If not,
      36  see https://www.gnu.org/licenses/.  */
      37  
      38  #include "gmp-impl.h"
      39  #include "longlong.h"
      40  
      41  /* TODO:
      42     - split this file in smaller parts with functions that can be recycled for different computations.
      43   */
      44  
      45  /**************************************************************/
      46  /* Section macros: common macros, for mswing/fac/bin (&sieve) */
      47  /**************************************************************/
      48  
      49  #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)			\
      50    if ((PR) > (MAX_PR)) {					\
      51      (VEC)[(I)++] = (PR);					\
      52      (PR) = 1;							\
      53    }
      54  
      55  #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
      56    do {								\
      57      if ((PR) > (MAX_PR)) {					\
      58        (VEC)[(I)++] = (PR);					\
      59        (PR) = (P);						\
      60      } else							\
      61        (PR) *= (P);						\
      62    } while (0)
      63  
      64  #define LOOP_ON_SIEVE_CONTINUE(prime,end)			\
      65      __max_i = (end);						\
      66  								\
      67      do {							\
      68        ++__i;							\
      69        if ((*__sieve & __mask) == 0)				\
      70  	{							\
      71  	  mp_limb_t prime;					\
      72  	  prime = id_to_n(__i)
      73  
      74  #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
      75    do {								\
      76      mp_limb_t __mask, *__sieve, __max_i, __i;			\
      77  								\
      78      __i = (start)-(off);					\
      79      __sieve = (sieve) + __i / GMP_LIMB_BITS;			\
      80      __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
      81      __i += (off);						\
      82  								\
      83      LOOP_ON_SIEVE_CONTINUE(prime,end)
      84  
      85  #define LOOP_ON_SIEVE_STOP					\
      86  	}							\
      87        __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
      88        __sieve += __mask & 1;					\
      89      }  while (__i <= __max_i)
      90  
      91  #define LOOP_ON_SIEVE_END					\
      92      LOOP_ON_SIEVE_STOP;						\
      93    } while (0)
      94  
      95  /*********************************************************/
      96  /* Section sieve: sieving functions and tools for primes */
      97  /*********************************************************/
      98  
      99  #if WANT_ASSERT
     100  static mp_limb_t
     101  bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
     102  #endif
     103  
     104  /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
     105  static mp_limb_t
     106  id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
     107  
     108  /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
     109  static mp_limb_t
     110  n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
     111  
     112  #if WANT_ASSERT
     113  static mp_size_t
     114  primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
     115  #endif
     116  
     117  /*********************************************************/
     118  /* Section mswing: 2-multiswing factorial                */
     119  /*********************************************************/
     120  
     121  /* Returns an approximation of the sqare root of x.
     122   * It gives:
     123   *   limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2
     124   * or
     125   *   x <= limb_apprsqrt (x) ^ 2 <= x * 9/8
     126   */
     127  static mp_limb_t
     128  limb_apprsqrt (mp_limb_t x)
     129  {
     130    int s;
     131  
     132    ASSERT (x > 2);
     133    count_leading_zeros (s, x);
     134    s = (GMP_LIMB_BITS - s) >> 1;
     135    return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s));
     136  }
     137  
     138  #if 0
     139  /* A count-then-exponentiate variant for SWING_A_PRIME */
     140  #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)		\
     141    do {							\
     142      mp_limb_t __q, __prime;				\
     143      int __exp;						\
     144      __prime = (P);					\
     145      __exp = 0;						\
     146      __q = (N);						\
     147      do {						\
     148        __q /= __prime;					\
     149        __exp += __q & 1;					\
     150      } while (__q >= __prime);				\
     151      if (__exp) { /* Store $prime^{exp}$ */		\
     152        for (__q = __prime; --__exp; __q *= __prime);	\
     153        FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I);	\
     154      };							\
     155    } while (0)
     156  #else
     157  #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
     158    do {						\
     159      mp_limb_t __q, __prime;			\
     160      __prime = (P);				\
     161      FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);	\
     162      __q = (N);					\
     163      do {					\
     164        __q /= __prime;				\
     165        if ((__q & 1) != 0) (PR) *= __prime;	\
     166      } while (__q >= __prime);			\
     167    } while (0)
     168  #endif
     169  
     170  #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
     171    do {							\
     172      mp_limb_t __prime;					\
     173      __prime = (P);					\
     174      if ((((N) / __prime) & 1) != 0)			\
     175        FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);	\
     176    } while (0)
     177  
     178  /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
     179     factorial of the parameter n.  The result x is an odd positive
     180     integer so that multiswing(n,2) = x 2^a.
     181  
     182     Uses the algorithm described by Peter Luschny in "Divide, Swing and
     183     Conquer the Factorial!".
     184  
     185     The pointer sieve points to primesieve_size(n) limbs containing a
     186     bit-array where primes are marked as 0.
     187     Enough (FIXME: explain :-) limbs must be pointed by factors.
     188   */
     189  
     190  static void
     191  mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
     192  {
     193    mp_limb_t prod, max_prod;
     194    mp_size_t j;
     195  
     196    ASSERT (n > 25);
     197  
     198    j = 0;
     199    prod  = -(n & 1);
     200    n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
     201  
     202    prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
     203    max_prod = GMP_NUMB_MAX / (n-1);
     204  
     205    /* Handle prime = 3 separately. */
     206    SWING_A_PRIME (3, n, prod, max_prod, factors, j);
     207  
     208    /* Swing primes from 5 to n/3 */
     209    {
     210      mp_limb_t s, l_max_prod;
     211  
     212      s = limb_apprsqrt(n);
     213      ASSERT (s >= 5);
     214      s = n_to_bit (s);
     215      ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n);
     216      ASSERT (s < n_to_bit (n / 3));
     217      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
     218      SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
     219      LOOP_ON_SIEVE_STOP;
     220  
     221      ASSERT (max_prod <= GMP_NUMB_MAX / 3);
     222  
     223      l_max_prod = max_prod * 3;
     224  
     225      LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3));
     226      SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
     227      LOOP_ON_SIEVE_END;
     228    }
     229  
     230    /* Store primes from (n+1)/2 to n */
     231    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
     232    FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
     233    LOOP_ON_SIEVE_END;
     234  
     235    if (LIKELY (j != 0))
     236      {
     237        factors[j++] = prod;
     238        mpz_prodlimbs (x, factors, j);
     239      }
     240    else
     241      {
     242        ASSERT (ALLOC (x) > 0);
     243        PTR (x)[0] = prod;
     244        SIZ (x) = 1;
     245      }
     246  }
     247  
     248  #undef SWING_A_PRIME
     249  #undef SH_SWING_A_PRIME
     250  #undef LOOP_ON_SIEVE_END
     251  #undef LOOP_ON_SIEVE_STOP
     252  #undef LOOP_ON_SIEVE_BEGIN
     253  #undef LOOP_ON_SIEVE_CONTINUE
     254  #undef FACTOR_LIST_APPEND
     255  
     256  /*********************************************************/
     257  /* Section oddfac: odd factorial, needed also by binomial*/
     258  /*********************************************************/
     259  
     260  /* FIXME: refine che following estimate. */
     261  
     262  #if TUNE_PROGRAM_BUILD
     263  #define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD_LIMIT*FAC_DSC_THRESHOLD_LIMIT-1)+1) - 1)
     264  #else
     265  #define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD*FAC_DSC_THRESHOLD-1)+1) - 1)
     266  #endif
     267  
     268  /* mpz_oddfac_1 computes the odd part of the factorial of the
     269     parameter n.  I.e. n! = x 2^a, where x is the returned value: an
     270     odd positive integer.
     271  
     272     If flag != 0 a square is skipped in the DSC part, e.g.
     273     if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
     274  
     275     If n is too small, flag is ignored, and an ASSERT can be triggered.
     276  
     277     TODO: FAC_DSC_THRESHOLD is used here with two different roles:
     278      - to decide when prime factorisation is needed,
     279      - to stop the recursion, once sieving is done.
     280     Maybe two thresholds can do a better job.
     281   */
     282  void
     283  mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
     284  {
     285    ASSERT (n <= GMP_NUMB_MAX);
     286    ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
     287  
     288    if (n <= ODD_FACTORIAL_TABLE_LIMIT)
     289      {
     290        MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n];
     291        SIZ (x) = 1;
     292      }
     293    else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
     294      {
     295        mp_ptr   px;
     296  
     297        px = MPZ_NEWALLOC (x, 2);
     298        umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
     299        SIZ (x) = 2;
     300      }
     301    else
     302      {
     303        unsigned s;
     304        mp_ptr   factors;
     305  
     306        s = 0;
     307        {
     308  	mp_limb_t tn;
     309  	mp_limb_t prod, max_prod;
     310  	mp_size_t j;
     311  	TMP_SDECL;
     312  
     313  #if TUNE_PROGRAM_BUILD
     314  	ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
     315  	ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
     316  #endif
     317  
     318  	/* Compute the number of recursive steps for the DSC algorithm. */
     319  	for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
     320  	  tn >>= 1;
     321  
     322  	j = 0;
     323  
     324  	TMP_SMARK;
     325  	factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
     326  	ASSERT (tn >= FACTORS_PER_LIMB);
     327  
     328  	prod = 1;
     329  #if TUNE_PROGRAM_BUILD
     330  	max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD_LIMIT * FAC_DSC_THRESHOLD_LIMIT);
     331  #else
     332  	max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD * FAC_DSC_THRESHOLD);
     333  #endif
     334  
     335  	ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
     336  	do {
     337  	  factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
     338  	  mp_limb_t diff = (tn - ODD_DOUBLEFACTORIAL_TABLE_LIMIT) & -CNST_LIMB (2);
     339  	  if ((diff & 2) != 0)
     340  	    {
     341  	      FACTOR_LIST_STORE (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff, prod, max_prod, factors, j);
     342  	      diff -= 2;
     343  	    }
     344  	  if (diff != 0)
     345  	    {
     346  	      mp_limb_t fac = (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2) *
     347  		(ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff);
     348  	      do {
     349  		FACTOR_LIST_STORE (fac, prod, max_prod, factors, j);
     350  		diff -= 4;
     351  		fac += diff * 2;
     352  	      } while (diff != 0);
     353  	    }
     354  	  max_prod <<= 2;
     355  	  tn >>= 1;
     356  	} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
     357  
     358  	factors[j++] = prod;
     359  	factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
     360  	factors[j++] = __gmp_oddfac_table[tn >> 1];
     361  	mpz_prodlimbs (x, factors, j);
     362  
     363  	TMP_SFREE;
     364        }
     365  
     366        if (s != 0)
     367  	/* Use the algorithm described by Peter Luschny in "Divide,
     368  	   Swing and Conquer the Factorial!".
     369  
     370  	   Improvement: there are two temporary buffers, factors and
     371  	   square, that are never used together; with a good estimate
     372  	   of the maximal needed size, they could share a single
     373  	   allocation.
     374  	*/
     375  	{
     376  	  mpz_t mswing;
     377  	  mp_ptr sieve;
     378  	  mp_size_t size;
     379  	  TMP_DECL;
     380  
     381  	  TMP_MARK;
     382  
     383  	  flag--;
     384  	  size = n / GMP_NUMB_BITS + 4;
     385  	  ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
     386  	  /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
     387  	     one more can be overwritten by mul, another for the sieve */
     388  	  MPZ_TMP_INIT (mswing, size);
     389  	  /* Initialize size, so that ASSERT can check it correctly. */
     390  	  ASSERT_CODE (SIZ (mswing) = 0);
     391  
     392  	  /* Put the sieve on the second half, it will be overwritten by the last mswing. */
     393  	  sieve = PTR (mswing) + size / 2 + 1;
     394  
     395  	  size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
     396  
     397  	  factors = TMP_ALLOC_LIMBS (size);
     398  	  do {
     399  	    mp_ptr    square, px;
     400  	    mp_size_t nx, ns;
     401  	    mp_limb_t cy;
     402  	    TMP_DECL;
     403  
     404  	    s--;
     405  	    ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
     406  	    mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
     407  
     408  	    TMP_MARK;
     409  	    nx = SIZ (x);
     410  	    if (s == flag) {
     411  	      size = nx;
     412  	      square = TMP_ALLOC_LIMBS (size);
     413  	      MPN_COPY (square, PTR (x), nx);
     414  	    } else {
     415  	      size = nx << 1;
     416  	      square = TMP_ALLOC_LIMBS (size);
     417  	      mpn_sqr (square, PTR (x), nx);
     418  	      size -= (square[size - 1] == 0);
     419  	    }
     420  	    ns = SIZ (mswing);
     421  	    nx = size + ns;
     422  	    px = MPZ_NEWALLOC (x, nx);
     423  	    ASSERT (ns <= size);
     424  	    cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
     425  
     426  	    SIZ(x) = nx - (cy == 0);
     427  	    TMP_FREE;
     428  	  } while (s != 0);
     429  	  TMP_FREE;
     430  	}
     431      }
     432  }
     433  
     434  #undef FACTORS_PER_LIMB
     435  #undef FACTOR_LIST_STORE