1  /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
       2  
       3     Contributed to the GNU project by Robert Harley.
       4     Improvements by Paul Zimmermann and Marco Bodrato.
       5  
       6     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       7     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       8     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       9  
      10  Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
      11  
      12  This file is part of the GNU MP Library.
      13  
      14  The GNU MP Library is free software; you can redistribute it and/or modify
      15  it under the terms of either:
      16  
      17    * the GNU Lesser General Public License as published by the Free
      18      Software Foundation; either version 3 of the License, or (at your
      19      option) any later version.
      20  
      21  or
      22  
      23    * the GNU General Public License as published by the Free Software
      24      Foundation; either version 2 of the License, or (at your option) any
      25      later version.
      26  
      27  or both in parallel, as here.
      28  
      29  The GNU MP Library is distributed in the hope that it will be useful, but
      30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      32  for more details.
      33  
      34  You should have received copies of the GNU General Public License and the
      35  GNU Lesser General Public License along with the GNU MP Library.  If not,
      36  see https://www.gnu.org/licenses/.  */
      37  
      38  #include "gmp-impl.h"
      39  
      40  void
      41  mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
      42  			   mp_size_t k, mp_size_t twor, int sa,
      43  			   mp_limb_t vinf0)
      44  {
      45    mp_limb_t cy, saved;
      46    mp_size_t twok;
      47    mp_size_t kk1;
      48    mp_ptr c1, v1, c3, vinf;
      49  
      50    twok = k + k;
      51    kk1 = twok + 1;
      52  
      53    c1 = c  + k;
      54    v1 = c1 + k;
      55    c3 = v1 + k;
      56    vinf = c3 + k;
      57  
      58  #define v0 (c)
      59    /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
      60       thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
      61    */
      62    if (sa)
      63      ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
      64    else
      65      ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
      66  
      67    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
      68         v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
      69  
      70    ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
      71  						      /* (5 3 1 1 0)*/
      72  
      73    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
      74         v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
      75  
      76    /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
      77       tm1 >= 0                                         (0  1 0  1 0)
      78       No carry comes out from {v1, kk1} +/- {vm1, kk1},
      79       and the division by two is exact.
      80       If (sa!=0) the sign of vm1 is negative */
      81    if (sa)
      82      {
      83  #ifdef HAVE_NATIVE_mpn_rsh1add_n
      84        mpn_rsh1add_n (vm1, v1, vm1, kk1);
      85  #else
      86        ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
      87        ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
      88  #endif
      89      }
      90    else
      91      {
      92  #ifdef HAVE_NATIVE_mpn_rsh1sub_n
      93        mpn_rsh1sub_n (vm1, v1, vm1, kk1);
      94  #else
      95        ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
      96        ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
      97  #endif
      98      }
      99  
     100    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
     101         v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
     102  
     103    /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
     104       t1 >= 0
     105    */
     106    vinf[0] -= mpn_sub_n (v1, v1, c, twok);
     107  
     108    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
     109         v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
     110  
     111    /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
     112       t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
     113    */
     114  #ifdef HAVE_NATIVE_mpn_rsh1sub_n
     115    mpn_rsh1sub_n (v2, v2, v1, kk1);
     116  #else
     117    ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
     118    ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
     119  #endif
     120  
     121    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
     122         v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
     123  
     124    /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
     125       result is v1 >= 0
     126    */
     127    ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
     128  
     129    /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
     130    cy = mpn_add_n (c1, c1, vm1, kk1);
     131    MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
     132    /* Memory allocated for vm1 is now free, it can be recycled ...*/
     133  
     134    /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
     135       result is v2 >= 0 */
     136    saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
     137    vinf[0] = vinf0;       /* Set the right value for vinf0                     */
     138  #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
     139    cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
     140  #else
     141    /* Overwrite unused vm1 */
     142    cy = mpn_lshift (vm1, vinf, twor, 1);
     143    cy += mpn_sub_n (v2, v2, vm1, twor);
     144  #endif
     145    MPN_DECR_U (v2 + twor, kk1 - twor, cy);
     146  
     147    /* Current matrix is
     148       [1 0 0 0 0; vinf
     149        0 1 0 0 0; v2
     150        1 0 1 0 0; v1
     151        0 1 0 1 0; vm1
     152        0 0 0 0 1] v0
     153       Some values already are in-place (we added vm1 in the correct position)
     154       | vinf|  v1 |  v0 |
     155  	      | vm1 |
     156       One still is in a separated area
     157  	| +v2 |
     158       We have to compute v1-=vinf; vm1 -= v2,
     159  	   |-vinf|
     160  	      | -v2 |
     161       Carefully reordering operations we can avoid to compute twice the sum
     162       of the high half of v2 plus the low half of vinf.
     163    */
     164  
     165    /* Add the high half of t2 in {vinf} */
     166    if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
     167      cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
     168      MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
     169    } else { /* triggered only by very unbalanced cases like
     170  	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
     171      ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
     172    }
     173    /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
     174       result is >= 0 */
     175    /* Side effect: we also subtracted (high half) vm1 -= v2 */
     176    cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
     177    vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
     178    vinf[0] = saved;
     179    MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
     180  
     181    /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
     182       Operate only on the low half.
     183    */
     184    cy = mpn_sub_n (c1, c1, v2, k);
     185    MPN_DECR_U (v1, kk1, cy);
     186  
     187    /********************* Beginning the final phase **********************/
     188  
     189    /* Most of the recomposition was done */
     190  
     191    /* add t2 in {c+3k, ...}, but only the low half */
     192    cy = mpn_add_n (c3, c3, v2, k);
     193    vinf[0] += cy;
     194    ASSERT(vinf[0] >= cy); /* No carry */
     195    MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
     196  
     197  #undef v0
     198  }