1  /*
       2   * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
       3   *
       4   * Redistribution and use in source and binary forms, with or without
       5   * modification, are permitted provided that the following conditions
       6   * are met:
       7   *
       8   * 1. Redistributions of source code must retain the above copyright
       9   *    notice, this list of conditions and the following disclaimer.
      10   *
      11   * 2. Redistributions in binary form must reproduce the above copyright
      12   *    notice, this list of conditions and the following disclaimer in the
      13   *    documentation and/or other materials provided with the distribution.
      14   *
      15   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
      16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      17   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      18   * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      19   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      20   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      21   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      22   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      23   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      24   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      25   * SUCH DAMAGE.
      26   */
      27  
      28  
      29  #include "mpdecimal.h"
      30  
      31  #include <assert.h>
      32  
      33  #include "bits.h"
      34  #include "constants.h"
      35  #include "difradix2.h"
      36  #include "numbertheory.h"
      37  #include "umodarith.h"
      38  
      39  
      40  /* Bignum: The actual transform routine (decimation in frequency). */
      41  
      42  
      43  /*
      44   * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
      45   * n must be a power of two.
      46   * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
      47   * Chapter 1.14.4. [http://www.jjj.de/fxt/]
      48   */
      49  static inline void
      50  bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
      51  {
      52      mpd_size_t x = 0;
      53      mpd_size_t r = 0;
      54      mpd_uint_t t;
      55  
      56      do { /* Invariant: r = bitreverse(x) */
      57          if (r > x) {
      58              t = a[x];
      59              a[x] = a[r];
      60              a[r] = t;
      61          }
      62          /* Flip trailing consecutive 1 bits and the first zero bit
      63           * that absorbs a possible carry. */
      64          x += 1;
      65          /* Mirror the operation on r: Flip n_trailing_zeros(x)+1
      66             high bits of r. */
      67          r ^= (n - (n >> (mpd_bsf(x)+1)));
      68          /* The loop invariant is preserved. */
      69      } while (x < n);
      70  }
      71  
      72  
      73  /* Fast Number Theoretic Transform, decimation in frequency. */
      74  void
      75  fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
      76  {
      77      mpd_uint_t *wtable = tparams->wtable;
      78      mpd_uint_t umod;
      79  #ifdef PPRO
      80      double dmod;
      81      uint32_t dinvmod[3];
      82  #endif
      83      mpd_uint_t u0, u1, v0, v1;
      84      mpd_uint_t w, w0, w1, wstep;
      85      mpd_size_t m, mhalf;
      86      mpd_size_t j, r;
      87  
      88  
      89      assert(ispower2(n));
      90      assert(n >= 4);
      91  
      92      SETMODULUS(tparams->modnum);
      93  
      94      /* m == n */
      95      mhalf = n / 2;
      96      for (j = 0; j < mhalf; j += 2) {
      97  
      98          w0 = wtable[j];
      99          w1 = wtable[j+1];
     100  
     101          u0 = a[j];
     102          v0 = a[j+mhalf];
     103  
     104          u1 = a[j+1];
     105          v1 = a[j+1+mhalf];
     106  
     107          a[j] = addmod(u0, v0, umod);
     108          v0 = submod(u0, v0, umod);
     109  
     110          a[j+1] = addmod(u1, v1, umod);
     111          v1 = submod(u1, v1, umod);
     112  
     113          MULMOD2(&v0, w0, &v1, w1);
     114  
     115          a[j+mhalf] = v0;
     116          a[j+1+mhalf] = v1;
     117  
     118      }
     119  
     120      wstep = 2;
     121      for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
     122  
     123          mhalf = m / 2;
     124  
     125          /* j == 0 */
     126          for (r = 0; r < n; r += 2*m) {
     127  
     128              u0 = a[r];
     129              v0 = a[r+mhalf];
     130  
     131              u1 = a[m+r];
     132              v1 = a[m+r+mhalf];
     133  
     134              a[r] = addmod(u0, v0, umod);
     135              v0 = submod(u0, v0, umod);
     136  
     137              a[m+r] = addmod(u1, v1, umod);
     138              v1 = submod(u1, v1, umod);
     139  
     140              a[r+mhalf] = v0;
     141              a[m+r+mhalf] = v1;
     142          }
     143  
     144          for (j = 1; j < mhalf; j++) {
     145  
     146              w = wtable[j*wstep];
     147  
     148              for (r = 0; r < n; r += 2*m) {
     149  
     150                  u0 = a[r+j];
     151                  v0 = a[r+j+mhalf];
     152  
     153                  u1 = a[m+r+j];
     154                  v1 = a[m+r+j+mhalf];
     155  
     156                  a[r+j] = addmod(u0, v0, umod);
     157                  v0 = submod(u0, v0, umod);
     158  
     159                  a[m+r+j] = addmod(u1, v1, umod);
     160                  v1 = submod(u1, v1, umod);
     161  
     162                  MULMOD2C(&v0, &v1, w);
     163  
     164                  a[r+j+mhalf] = v0;
     165                  a[m+r+j+mhalf] = v1;
     166              }
     167  
     168          }
     169  
     170      }
     171  
     172      bitreverse_permute(a, n);
     173  }