1  /* mpz_lucnum_ui -- calculate Lucas number.
       2  
       3  Copyright 2001, 2003, 2005, 2011, 2012, 2015, 2016 Free Software Foundation, Inc.
       4  
       5  This file is part of the GNU MP Library.
       6  
       7  The GNU MP Library is free software; you can redistribute it and/or modify
       8  it under the terms of either:
       9  
      10    * the GNU Lesser General Public License as published by the Free
      11      Software Foundation; either version 3 of the License, or (at your
      12      option) any later version.
      13  
      14  or
      15  
      16    * the GNU General Public License as published by the Free Software
      17      Foundation; either version 2 of the License, or (at your option) any
      18      later version.
      19  
      20  or both in parallel, as here.
      21  
      22  The GNU MP Library is distributed in the hope that it will be useful, but
      23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      25  for more details.
      26  
      27  You should have received copies of the GNU General Public License and the
      28  GNU Lesser General Public License along with the GNU MP Library.  If not,
      29  see https://www.gnu.org/licenses/.  */
      30  
      31  #include <stdio.h>
      32  #include "gmp-impl.h"
      33  
      34  
      35  /* change this to "#define TRACE(x) x" for diagnostics */
      36  #define TRACE(x)
      37  
      38  
      39  /* Notes:
      40  
      41     For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
      42     there can't be an overflow applying +4 to just the low limb (since that
      43     would leave 0, 1, 2 or 3 mod 8).
      44  
      45     For the -4 in L[2k+1] when k is even, it seems (no proof) that
      46     L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
      47     L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
      48     low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least
      49     conceivable to calculate it, so it probably should be handled.
      50  
      51     For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
      52     2^b, so for instance in 32-bits L[0x80000000] has a low limb of
      53     0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is
      54     obviously huge, but probably should be made to work.  */
      55  
      56  void
      57  mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
      58  {
      59    mp_size_t  lalloc, xalloc, lsize, xsize;
      60    mp_ptr     lp, xp;
      61    mp_limb_t  c;
      62    int        zeros;
      63    TMP_DECL;
      64  
      65    TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
      66  
      67    if (n <= FIB_TABLE_LUCNUM_LIMIT)
      68      {
      69        /* L[n] = F[n] + 2F[n-1] */
      70        MPZ_NEWALLOC (ln, 1)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
      71        SIZ(ln) = 1;
      72        return;
      73      }
      74  
      75    /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
      76       since square or mul used below might need an extra limb over the true
      77       size */
      78    lalloc = MPN_FIB2_SIZE (n) + 2;
      79    lp = MPZ_NEWALLOC (ln, lalloc);
      80  
      81    TMP_MARK;
      82    xalloc = lalloc;
      83    xp = TMP_ALLOC_LIMBS (xalloc);
      84  
      85    /* Strip trailing zeros from n, until either an odd number is reached
      86       where the L[2k+1] formula can be used, or until n fits within the
      87       FIB_TABLE data.  The table is preferred of course.  */
      88    zeros = 0;
      89    for (;;)
      90      {
      91        if (n & 1)
      92  	{
      93  	  /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
      94  
      95  	  mp_size_t  yalloc, ysize;
      96  	  mp_ptr     yp;
      97  
      98  	  TRACE (printf ("  initial odd n=%lu\n", n));
      99  
     100  	  yalloc = MPN_FIB2_SIZE (n/2);
     101  	  yp = TMP_ALLOC_LIMBS (yalloc);
     102  	  ASSERT (xalloc >= yalloc);
     103  
     104  	  xsize = mpn_fib2_ui (xp, yp, n/2);
     105  
     106  	  /* possible high zero on F[k-1] */
     107  	  ysize = xsize;
     108  	  ysize -= (yp[ysize-1] == 0);
     109  	  ASSERT (yp[ysize-1] != 0);
     110  
     111  	  /* xp = 2*F[k] + F[k-1] */
     112  #if HAVE_NATIVE_mpn_addlsh1_n
     113  	  c = mpn_addlsh1_n (xp, yp, xp, xsize);
     114  #else
     115  	  c = mpn_lshift (xp, xp, xsize, 1);
     116  	  c += mpn_add_n (xp, xp, yp, xsize);
     117  #endif
     118  	  ASSERT (xalloc >= xsize+1);
     119  	  xp[xsize] = c;
     120  	  xsize += (c != 0);
     121  	  ASSERT (xp[xsize-1] != 0);
     122  
     123  	  ASSERT (lalloc >= xsize + ysize);
     124  	  c = mpn_mul (lp, xp, xsize, yp, ysize);
     125  	  lsize = xsize + ysize;
     126  	  lsize -= (c == 0);
     127  
     128  	  /* lp = 5*lp */
     129  #if HAVE_NATIVE_mpn_addlsh2_n
     130  	  c = mpn_addlsh2_n (lp, lp, lp, lsize);
     131  #else
     132  	  /* FIXME: Is this faster than mpn_mul_1 ? */
     133  	  c = mpn_lshift (xp, lp, lsize, 2);
     134  	  c += mpn_add_n (lp, lp, xp, lsize);
     135  #endif
     136  	  ASSERT (lalloc >= lsize+1);
     137  	  lp[lsize] = c;
     138  	  lsize += (c != 0);
     139  
     140  	  /* lp = lp - 4*(-1)^k */
     141  	  if (n & 2)
     142  	    {
     143  	      /* no overflow, see comments above */
     144  	      ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
     145  	      lp[0] += 4;
     146  	    }
     147  	  else
     148  	    {
     149  	      /* won't go negative */
     150  	      MPN_DECR_U (lp, lsize, CNST_LIMB(4));
     151  	    }
     152  
     153  	  TRACE (mpn_trace ("  l",lp, lsize));
     154  	  break;
     155  	}
     156  
     157        MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
     158        zeros++;
     159        n /= 2;
     160  
     161        if (n <= FIB_TABLE_LUCNUM_LIMIT)
     162  	{
     163  	  /* L[n] = F[n] + 2F[n-1] */
     164  	  lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
     165  	  lsize = 1;
     166  
     167  	  TRACE (printf ("  initial small n=%lu\n", n);
     168  		 mpn_trace ("  l",lp, lsize));
     169  	  break;
     170  	}
     171      }
     172  
     173    for ( ; zeros != 0; zeros--)
     174      {
     175        /* L[2k] = L[k]^2 + 2*(-1)^k */
     176  
     177        TRACE (printf ("  zeros=%d\n", zeros));
     178  
     179        ASSERT (xalloc >= 2*lsize);
     180        mpn_sqr (xp, lp, lsize);
     181        lsize *= 2;
     182        lsize -= (xp[lsize-1] == 0);
     183  
     184        /* First time around the loop k==n determines (-1)^k, after that k is
     185  	 always even and we set n=0 to indicate that.  */
     186        if (n & 1)
     187  	{
     188  	  /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
     189  	  ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
     190  	  xp[0] += 2;
     191  	  n = 0;
     192  	}
     193        else
     194  	{
     195  	  /* won't go negative */
     196  	  MPN_DECR_U (xp, lsize, CNST_LIMB(2));
     197  	}
     198  
     199        MP_PTR_SWAP (xp, lp);
     200        ASSERT (lp[lsize-1] != 0);
     201      }
     202  
     203    /* should end up in the right spot after all the xp/lp swaps */
     204    ASSERT (lp == PTR(ln));
     205    SIZ(ln) = lsize;
     206  
     207    TMP_FREE;
     208  }