1  /* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
       2  
       3  Copyright 1998-2002, 2012, 2013, 2015, 2017-2018, 2020 Free Software
       4  Foundation, Inc.
       5  
       6  This file is part of the GNU MP Library.
       7  
       8  The GNU MP Library is free software; you can redistribute it and/or modify
       9  it under the terms of either:
      10  
      11    * the GNU Lesser General Public License as published by the Free
      12      Software Foundation; either version 3 of the License, or (at your
      13      option) any later version.
      14  
      15  or
      16  
      17    * the GNU General Public License as published by the Free Software
      18      Foundation; either version 2 of the License, or (at your option) any
      19      later version.
      20  
      21  or both in parallel, as here.
      22  
      23  The GNU MP Library is distributed in the hope that it will be useful, but
      24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      26  for more details.
      27  
      28  You should have received copies of the GNU General Public License and the
      29  GNU Lesser General Public License along with the GNU MP Library.  If not,
      30  see https://www.gnu.org/licenses/.  */
      31  
      32  #include "gmp-impl.h"
      33  
      34  /* How many special cases? Minimum is 2: 0 and 1;
      35   * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
      36   */
      37  #define APARTAJ_KALKULOJ 2
      38  
      39  /* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
      40   * the operands fit.
      41   */
      42  #define UZU_BIN_UIUI 0
      43  
      44  /* Whether to use a shortcut to precompute the product of four
      45   * elements (1), or precompute only the product of a couple (0).
      46   *
      47   * In both cases the precomputed product is then updated with some
      48   * linear operations to obtain the product of the next four (1)
      49   * [or two (0)] operands.
      50   */
      51  #define KVAROPE 1
      52  
      53  static void
      54  posmpz_init (mpz_ptr r)
      55  {
      56    mp_ptr rp;
      57    ASSERT (SIZ (r) > 0);
      58    rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
      59    *rp = 0;
      60    *++rp = 0;
      61  }
      62  
      63  /* Equivalent to mpz_add_ui (r, r, in), but faster when
      64     0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
      65  static void
      66  posmpz_inc_ui (mpz_ptr r, unsigned long in)
      67  {
      68  #if BITS_PER_ULONG > GMP_NUMB_BITS
      69    mpz_add_ui (r, r, in);
      70  #else
      71    ASSERT (SIZ (r) > 0);
      72    MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
      73    SIZ (r) += PTR (r)[SIZ (r)];
      74  #endif
      75  }
      76  
      77  /* Equivalent to mpz_sub_ui (r, r, in), but faster when
      78     0 < SIZ (r) and we know in advance that the result is positive. */
      79  static void
      80  posmpz_dec_ui (mpz_ptr r, unsigned long in)
      81  {
      82  #if BITS_PER_ULONG > GMP_NUMB_BITS
      83    mpz_sub_ui (r, r, in);
      84  #else
      85    ASSERT (mpz_cmp_ui (r, in) >= 0);
      86    MPN_DECR_U (PTR (r), SIZ (r), in);
      87    SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
      88  #endif
      89  }
      90  
      91  /* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
      92     0 < SIZ (r) and we know in advance that the result is positive. */
      93  static void
      94  posmpz_rsh1 (mpz_ptr r)
      95  {
      96    mp_ptr rp;
      97    mp_size_t rn;
      98  
      99    rn = SIZ (r);
     100    rp = PTR (r);
     101    ASSERT (rn > 0);
     102    mpn_rshift (rp, rp, rn, 1);
     103    SIZ (r) -= rp[rn - 1] == 0;
     104  }
     105  
     106  /* Computes r = n(n+(2*k-1))/2
     107     It uses a sqare instead of a product, computing
     108     r = ((n+k-1)^2 + n - (k-1)^2)/2
     109     As a side effect, sets t = n+k-1
     110   */
     111  static void
     112  mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
     113  {
     114    ASSERT (k > 0 && SIZ(n) > 0);
     115    --k;
     116    mpz_add_ui (t, n, k);
     117    mpz_mul (r, t, t);
     118    mpz_add (r, r, n);
     119    posmpz_rsh1 (r);
     120    if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
     121      posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
     122    else
     123      {
     124        mpz_t tmp;
     125        mpz_init_set_ui (tmp, (k + (k & 1)));
     126        mpz_mul_ui (tmp, tmp, k >> 1);
     127        mpz_sub (r, r, tmp);
     128        mpz_clear (tmp);
     129      }
     130  }
     131  
     132  #if KVAROPE
     133  static void
     134  rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
     135  {
     136    if (k - lk < 5)
     137      {
     138        do {
     139  	posmpz_inc_ui (p, 4*k+2);
     140  	mpz_addmul_ui (P, p, 4*k);
     141  	posmpz_dec_ui (P, k);
     142  	mpz_mul (r, r, P);
     143        } while (--k > lk);
     144      }
     145    else
     146      {
     147        mpz_t lt;
     148        unsigned long int m;
     149  
     150        ALLOC (lt) = 0;
     151        SIZ (lt) = 0;
     152        if (t == NULL)
     153  	t = lt;
     154        m = ((k + lk) >> 1) + 1;
     155        rek_raising_fac4 (r, p, P, k, m, t);
     156  
     157        posmpz_inc_ui (p, 4*m+2);
     158        mpz_addmul_ui (P, p, 4*m);
     159        posmpz_dec_ui (P, m);
     160        mpz_set (t, P);
     161        rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
     162  
     163        mpz_mul (r, r, t);
     164        mpz_clear (lt);
     165      }
     166  }
     167  
     168  /* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
     169     rek_raising_fac4, and exploiting an idea inspired by a piece of
     170     code that Fredrik Johansson wrote and by a comment by Niels Möller.
     171  
     172     Assume k = 4i then compute:
     173       p  = (n+1)(n+4i)/2 - i
     174  	  (n+1+1)(n+4i)/2 = p + i + (n+4i)/2
     175  	  (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
     176       P  = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
     177       n' = n + 2
     178       i' = i - 1
     179  	  (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
     180  	  (n'-1)(n'+4i'+2)/2 - i' - 1 = p
     181  	  (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
     182  	  (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) =  p + 4i' + 1
     183  	  (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
     184       p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
     185  	  p' - 4i' - 2 = p
     186  	  (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
     187  	  (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
     188  	  (p' - 3i' - 1)*(p' - i')/2 = P
     189  	  (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
     190  	  (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
     191  	  (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
     192  	  (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
     193       P' = P + 4i'p' - i'
     194  
     195     And compute the product P * P' * P" ...
     196   */
     197  
     198  static void
     199  mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
     200  {
     201    ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
     202    posmpz_init (n);
     203    posmpz_inc_ui (n, 1);
     204    SIZ (r) = 0;
     205    if (k & 1)
     206      {
     207        mpz_set (r, n);
     208        posmpz_inc_ui (n, 1);
     209      }
     210    k >>= 1;
     211    if (APARTAJ_KALKULOJ < 2 && k == 0)
     212      return;
     213  
     214    mpz_hmul_nbnpk (p, n, k, t);
     215    posmpz_init (p);
     216  
     217    if (k & 1)
     218      {
     219        if (SIZ (r))
     220  	mpz_mul (r, r, p);
     221        else
     222  	mpz_set (r, p);
     223        posmpz_inc_ui (p, k - 1);
     224      }
     225    k >>= 1;
     226    if (APARTAJ_KALKULOJ < 4 && k == 0)
     227      return;
     228  
     229    mpz_hmul_nbnpk (t, p, k, n);
     230    if (SIZ (r))
     231      mpz_mul (r, r, t);
     232    else
     233      mpz_set (r, t);
     234  
     235    if (APARTAJ_KALKULOJ > 8 || k > 1)
     236      {
     237        posmpz_dec_ui (p, k);
     238        rek_raising_fac4 (r, p, t, k - 1, 0, n);
     239      }
     240  }
     241  
     242  #else /* KVAROPE */
     243  
     244  static void
     245  rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
     246  {
     247    /* Should the threshold depend on SIZ (n) ? */
     248    if (k - lk < 10)
     249      {
     250        do {
     251  	posmpz_inc_ui (n, k);
     252  	mpz_mul (r, r, n);
     253  	--k;
     254        } while (k > lk);
     255      }
     256    else
     257      {
     258        mpz_t t3;
     259        unsigned long int m;
     260  
     261        m = ((k + lk) >> 1) + 1;
     262        rek_raising_fac (r, n, k, m, t1, t2);
     263  
     264        posmpz_inc_ui (n, m);
     265        if (t1 == NULL)
     266  	{
     267  	  mpz_init_set (t3, n);
     268  	  t1 = t3;
     269  	}
     270        else
     271  	{
     272  	  ALLOC (t3) = 0;
     273  	  mpz_set (t1, n);
     274  	}
     275        rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
     276  
     277        mpz_mul (r, r, t1);
     278        mpz_clear (t3);
     279      }
     280  }
     281  
     282  /* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
     283     rek_raising_fac, and exploiting an idea inspired by a piece of
     284     code that Fredrik Johansson wrote.
     285  
     286     Force an even k = 2i then compute:
     287       p  = (n+1)(n+2i)/2
     288       i' = i - 1
     289       p == (n+1)(n+2i'+2)/2
     290       p' = p + i' == (n+2)(n+2i'+1)/2
     291       n' = n + 1
     292       p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
     293  
     294     And compute the product p * p' * p" ...
     295  */
     296  
     297  static void
     298  mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
     299  {
     300    unsigned long int hk;
     301    ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
     302    mpz_add_ui (n, n, 1);
     303    hk = k >> 1;
     304    mpz_hmul_nbnpk (p, n, hk, t);
     305  
     306    if ((k & 1) != 0)
     307      {
     308        mpz_add_ui (t, t, hk + 1);
     309        mpz_mul (r, t, p);
     310      }
     311    else
     312      {
     313        mpz_set (r, p);
     314      }
     315  
     316    if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
     317      {
     318        posmpz_init (p);
     319        rek_raising_fac (r, p, hk - 1, 0, t, n);
     320      }
     321  }
     322  #endif /* KVAROPE */
     323  
     324  /* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.
     325     In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
     326     the code here only for big n.
     327  
     328     The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
     329     1 section 1.2.6 part G. */
     330  
     331  void
     332  mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
     333  {
     334    mpz_t      ni;
     335    mp_size_t  negate;
     336  
     337    if (SIZ (n) < 0)
     338      {
     339        /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
     340        mpz_init (ni);
     341        mpz_add_ui (ni, n, 1L);
     342        mpz_neg (ni, ni);
     343        negate = (k & 1);   /* (-1)^k */
     344      }
     345    else
     346      {
     347        /* bin(n,k) == 0 if k>n
     348  	 (no test for this under the n<0 case, since -n+k-1 >= k there) */
     349        if (mpz_cmp_ui (n, k) < 0)
     350  	{
     351  	  SIZ (r) = 0;
     352  	  return;
     353  	}
     354  
     355        /* set ni = n-k */
     356        mpz_init (ni);
     357        mpz_sub_ui (ni, n, k);
     358        negate = 0;
     359      }
     360  
     361    /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
     362       for positive, 1 for negative). */
     363  
     364    /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
     365       whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
     366       = ni, and new ni of ni+k-ni = k.  */
     367    if (mpz_cmp_ui (ni, k) < 0)
     368      {
     369        unsigned long  tmp;
     370        tmp = k;
     371        k = mpz_get_ui (ni);
     372        mpz_set_ui (ni, tmp);
     373      }
     374  
     375    if (k < APARTAJ_KALKULOJ)
     376      {
     377        if (k == 0)
     378  	{
     379  	  SIZ (r) = 1;
     380  	  MPZ_NEWALLOC (r, 1)[0] = 1;
     381  	}
     382  #if APARTAJ_KALKULOJ > 2
     383        else if (k > 1)
     384  	{
     385  	  mpz_add_ui (ni, ni, 1 + (APARTAJ_KALKULOJ > 2 && k > 2));
     386  	  mpz_mul (r, ni, ni); /* r = (n + (k>2))^2 */
     387  	  if (APARTAJ_KALKULOJ == 2 || k == 2)
     388  	    {
     389  	      mpz_add (r, r, ni); /* n^2+n= n(n+1) */
     390  	      posmpz_rsh1 (r);
     391  	    }
     392  #if APARTAJ_KALKULOJ > 3
     393  #if APARTAJ_KALKULOJ != 5
     394  #error Not implemented! 3 < APARTAJ_KALKULOJ != 5
     395  #endif
     396  	  else /* k > 2 */
     397  	    { /* k = 3, 4 */
     398  	      mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
     399  	      if (k == 3)
     400  		{
     401  		  mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
     402  		  /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
     403  		}
     404  	      else /* k = 4 */
     405  		{
     406  		  mpz_add (ni, ni, r); /* (n+1)^2+n */
     407  		  mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
     408  		  /* We should subtract one: ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3). */
     409  		  /* PTR (r) [0] ^= 1; would suffice, but it is not even needed, */
     410  		  /* because the next division will shift away this bit anyway.  */
     411  		  /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
     412  		}
     413  	      mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1 | (k>>1));
     414  	      SIZ(r) -= PTR(r) [SIZ(r) - 1] == 0;
     415  	    }
     416  #endif
     417  	}
     418  #endif
     419        else
     420  	{ /* k = 1 */
     421  	  mpz_add_ui (r, ni, 1);
     422  	}
     423      }
     424  #if UZU_BIN_UIUI
     425    else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
     426      {
     427        mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
     428      }
     429  #endif
     430    else
     431      {
     432        mp_limb_t count;
     433        mpz_t num, den;
     434  
     435        mpz_init (num);
     436        mpz_init (den);
     437  
     438  #if KVAROPE
     439        mpz_raising_fac4 (num, ni, k, den, r);
     440        popc_limb (count, k);
     441        ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
     442        mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
     443  #else
     444        mpz_raising_fac (num, ni, k, den, r);
     445        popc_limb (count, k);
     446        ASSERT (k - (k >> 1) - count >= 0);
     447        mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
     448  #endif
     449  
     450        mpz_oddfac_1(den, k, 0);
     451  
     452        mpz_divexact(r, num, den);
     453        mpz_clear (num);
     454        mpz_clear (den);
     455      }
     456    mpz_clear (ni);
     457  
     458    SIZ(r) = (SIZ(r) ^ -negate) + negate;
     459  }