1  /*
       2   * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
       3   *
       4   * Redistribution and use in source and binary forms, with or without
       5   * modification, are permitted provided that the following conditions
       6   * are met:
       7   *
       8   * 1. Redistributions of source code must retain the above copyright
       9   *    notice, this list of conditions and the following disclaimer.
      10   *
      11   * 2. Redistributions in binary form must reproduce the above copyright
      12   *    notice, this list of conditions and the following disclaimer in the
      13   *    documentation and/or other materials provided with the distribution.
      14   *
      15   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
      16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      17   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      18   * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      19   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      20   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      21   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      22   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      23   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      24   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      25   * SUCH DAMAGE.
      26   */
      27  
      28  
      29  #include "mpdecimal.h"
      30  #include "bits.h"
      31  #include "constants.h"
      32  #include "convolute.h"
      33  #include "fnt.h"
      34  #include "fourstep.h"
      35  #include "numbertheory.h"
      36  #include "sixstep.h"
      37  #include "umodarith.h"
      38  
      39  
      40  /* Bignum: Fast convolution using the Number Theoretic Transform. Used for
      41     the multiplication of very large coefficients. */
      42  
      43  
      44  /* Convolute the data in c1 and c2. Result is in c1. */
      45  int
      46  fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
      47  {
      48      int (*fnt)(mpd_uint_t *, mpd_size_t, int);
      49      int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
      50  #ifdef PPRO
      51      double dmod;
      52      uint32_t dinvmod[3];
      53  #endif
      54      mpd_uint_t n_inv, umod;
      55      mpd_size_t i;
      56  
      57  
      58      SETMODULUS(modnum);
      59      n_inv = POWMOD(n, (umod-2));
      60  
      61      if (ispower2(n)) {
      62          if (n > SIX_STEP_THRESHOLD) {
      63              fnt = six_step_fnt;
      64              inv_fnt = inv_six_step_fnt;
      65          }
      66          else {
      67              fnt = std_fnt;
      68              inv_fnt = std_inv_fnt;
      69          }
      70      }
      71      else {
      72          fnt = four_step_fnt;
      73          inv_fnt = inv_four_step_fnt;
      74      }
      75  
      76      if (!fnt(c1, n, modnum)) {
      77          return 0;
      78      }
      79      if (!fnt(c2, n, modnum)) {
      80          return 0;
      81      }
      82      for (i = 0; i < n-1; i += 2) {
      83          mpd_uint_t x0 = c1[i];
      84          mpd_uint_t y0 = c2[i];
      85          mpd_uint_t x1 = c1[i+1];
      86          mpd_uint_t y1 = c2[i+1];
      87          MULMOD2(&x0, y0, &x1, y1);
      88          c1[i] = x0;
      89          c1[i+1] = x1;
      90      }
      91  
      92      if (!inv_fnt(c1, n, modnum)) {
      93          return 0;
      94      }
      95      for (i = 0; i < n-3; i += 4) {
      96          mpd_uint_t x0 = c1[i];
      97          mpd_uint_t x1 = c1[i+1];
      98          mpd_uint_t x2 = c1[i+2];
      99          mpd_uint_t x3 = c1[i+3];
     100          MULMOD2C(&x0, &x1, n_inv);
     101          MULMOD2C(&x2, &x3, n_inv);
     102          c1[i] = x0;
     103          c1[i+1] = x1;
     104          c1[i+2] = x2;
     105          c1[i+3] = x3;
     106      }
     107  
     108      return 1;
     109  }
     110  
     111  /* Autoconvolute the data in c1. Result is in c1. */
     112  int
     113  fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
     114  {
     115      int (*fnt)(mpd_uint_t *, mpd_size_t, int);
     116      int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
     117  #ifdef PPRO
     118      double dmod;
     119      uint32_t dinvmod[3];
     120  #endif
     121      mpd_uint_t n_inv, umod;
     122      mpd_size_t i;
     123  
     124  
     125      SETMODULUS(modnum);
     126      n_inv = POWMOD(n, (umod-2));
     127  
     128      if (ispower2(n)) {
     129          if (n > SIX_STEP_THRESHOLD) {
     130              fnt = six_step_fnt;
     131              inv_fnt = inv_six_step_fnt;
     132          }
     133          else {
     134              fnt = std_fnt;
     135              inv_fnt = std_inv_fnt;
     136          }
     137      }
     138      else {
     139          fnt = four_step_fnt;
     140          inv_fnt = inv_four_step_fnt;
     141      }
     142  
     143      if (!fnt(c1, n, modnum)) {
     144          return 0;
     145      }
     146      for (i = 0; i < n-1; i += 2) {
     147          mpd_uint_t x0 = c1[i];
     148          mpd_uint_t x1 = c1[i+1];
     149          MULMOD2(&x0, x0, &x1, x1);
     150          c1[i] = x0;
     151          c1[i+1] = x1;
     152      }
     153  
     154      if (!inv_fnt(c1, n, modnum)) {
     155          return 0;
     156      }
     157      for (i = 0; i < n-3; i += 4) {
     158          mpd_uint_t x0 = c1[i];
     159          mpd_uint_t x1 = c1[i+1];
     160          mpd_uint_t x2 = c1[i+2];
     161          mpd_uint_t x3 = c1[i+3];
     162          MULMOD2C(&x0, &x1, n_inv);
     163          MULMOD2C(&x2, &x3, n_inv);
     164          c1[i] = x0;
     165          c1[i+1] = x1;
     166          c1[i+2] = x2;
     167          c1[i+3] = x3;
     168      }
     169  
     170      return 1;
     171  }