1  /* mpn_mul_n -- Multiply two natural numbers of length n.
       2  
       3  Copyright (C) 1991-2023 Free Software Foundation, Inc.
       4  
       5  This file is part of the GNU MP Library.
       6  
       7  The GNU MP Library is free software; you can redistribute it and/or modify
       8  it under the terms of the GNU Lesser General Public License as published by
       9  the Free Software Foundation; either version 2.1 of the License, or (at your
      10  option) any later version.
      11  
      12  The GNU MP Library is distributed in the hope that it will be useful, but
      13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      14  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      15  License for more details.
      16  
      17  You should have received a copy of the GNU Lesser General Public License
      18  along with the GNU MP Library; see the file COPYING.LIB.  If not, see
      19  <https://www.gnu.org/licenses/>.  */
      20  
      21  #include <gmp.h>
      22  #include "gmp-impl.h"
      23  
      24  /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
      25     both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
      26     always stored.  Return the most significant limb.
      27  
      28     Argument constraints:
      29     1. PRODP != UP and PRODP != VP, i.e. the destination
      30        must be distinct from the multiplier and the multiplicand.  */
      31  
      32  /* If KARATSUBA_THRESHOLD is not already defined, define it to a
      33     value which is good on most machines.  */
      34  #ifndef KARATSUBA_THRESHOLD
      35  #define KARATSUBA_THRESHOLD 32
      36  #endif
      37  
      38  /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
      39  #if KARATSUBA_THRESHOLD < 2
      40  #undef KARATSUBA_THRESHOLD
      41  #define KARATSUBA_THRESHOLD 2
      42  #endif
      43  
      44  /* Handle simple cases with traditional multiplication.
      45  
      46     This is the most critical code of multiplication.  All multiplies rely
      47     on this, both small and huge.  Small ones arrive here immediately.  Huge
      48     ones arrive here as this is the base case for Karatsuba's recursive
      49     algorithm below.  */
      50  
      51  void
      52  impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
      53  {
      54    mp_size_t i;
      55    mp_limb_t cy_limb;
      56    mp_limb_t v_limb;
      57  
      58    /* Multiply by the first limb in V separately, as the result can be
      59       stored (not added) to PROD.  We also avoid a loop for zeroing.  */
      60    v_limb = vp[0];
      61    if (v_limb <= 1)
      62      {
      63        if (v_limb == 1)
      64  	MPN_COPY (prodp, up, size);
      65        else
      66  	MPN_ZERO (prodp, size);
      67        cy_limb = 0;
      68      }
      69    else
      70      cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
      71  
      72    prodp[size] = cy_limb;
      73    prodp++;
      74  
      75    /* For each iteration in the outer loop, multiply one limb from
      76       U with one limb from V, and add it to PROD.  */
      77    for (i = 1; i < size; i++)
      78      {
      79        v_limb = vp[i];
      80        if (v_limb <= 1)
      81  	{
      82  	  cy_limb = 0;
      83  	  if (v_limb == 1)
      84  	    cy_limb = mpn_add_n (prodp, prodp, up, size);
      85  	}
      86        else
      87  	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
      88  
      89        prodp[size] = cy_limb;
      90        prodp++;
      91      }
      92  }
      93  
      94  void
      95  impn_mul_n (mp_ptr prodp,
      96  	     mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
      97  {
      98    if ((size & 1) != 0)
      99      {
     100        /* The size is odd, the code code below doesn't handle that.
     101  	 Multiply the least significant (size - 1) limbs with a recursive
     102  	 call, and handle the most significant limb of S1 and S2
     103  	 separately.  */
     104        /* A slightly faster way to do this would be to make the Karatsuba
     105  	 code below behave as if the size were even, and let it check for
     106  	 odd size in the end.  I.e., in essence move this code to the end.
     107  	 Doing so would save us a recursive call, and potentially make the
     108  	 stack grow a lot less.  */
     109  
     110        mp_size_t esize = size - 1;	/* even size */
     111        mp_limb_t cy_limb;
     112  
     113        MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
     114        cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
     115        prodp[esize + esize] = cy_limb;
     116        cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
     117  
     118        prodp[esize + size] = cy_limb;
     119      }
     120    else
     121      {
     122        /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
     123  
     124  	 Split U in two pieces, U1 and U0, such that
     125  	 U = U0 + U1*(B**n),
     126  	 and V in V1 and V0, such that
     127  	 V = V0 + V1*(B**n).
     128  
     129  	 UV is then computed recursively using the identity
     130  
     131  		2n   n          n                     n
     132  	 UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
     133  			1 1        1  0   0  1              0 0
     134  
     135  	 Where B = 2**BITS_PER_MP_LIMB.  */
     136  
     137        mp_size_t hsize = size >> 1;
     138        mp_limb_t cy;
     139        int negflg;
     140  
     141        /*** Product H.	 ________________  ________________
     142  			|_____U1 x V1____||____U0 x V0_____|  */
     143        /* Put result in upper part of PROD and pass low part of TSPACE
     144  	 as new TSPACE.  */
     145        MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
     146  
     147        /*** Product M.	 ________________
     148  			|_(U1-U0)(V0-V1)_|  */
     149        if (mpn_cmp (up + hsize, up, hsize) >= 0)
     150  	{
     151  	  mpn_sub_n (prodp, up + hsize, up, hsize);
     152  	  negflg = 0;
     153  	}
     154        else
     155  	{
     156  	  mpn_sub_n (prodp, up, up + hsize, hsize);
     157  	  negflg = 1;
     158  	}
     159        if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
     160  	{
     161  	  mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
     162  	  negflg ^= 1;
     163  	}
     164        else
     165  	{
     166  	  mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
     167  	  /* No change of NEGFLG.  */
     168  	}
     169        /* Read temporary operands from low part of PROD.
     170  	 Put result in low part of TSPACE using upper part of TSPACE
     171  	 as new TSPACE.  */
     172        MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
     173  
     174        /*** Add/copy product H.  */
     175        MPN_COPY (prodp + hsize, prodp + size, hsize);
     176        cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
     177  
     178        /*** Add product M (if NEGFLG M is a negative number).  */
     179        if (negflg)
     180  	cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
     181        else
     182  	cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
     183  
     184        /*** Product L.	 ________________  ________________
     185  			|________________||____U0 x V0_____|  */
     186        /* Read temporary operands from low part of PROD.
     187  	 Put result in low part of TSPACE using upper part of TSPACE
     188  	 as new TSPACE.  */
     189        MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
     190  
     191        /*** Add/copy Product L (twice).  */
     192  
     193        cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
     194        if (cy)
     195  	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
     196  
     197        MPN_COPY (prodp, tspace, hsize);
     198        cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
     199        if (cy)
     200  	mpn_add_1 (prodp + size, prodp + size, size, 1);
     201      }
     202  }
     203  
     204  void
     205  impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
     206  {
     207    mp_size_t i;
     208    mp_limb_t cy_limb;
     209    mp_limb_t v_limb;
     210  
     211    /* Multiply by the first limb in V separately, as the result can be
     212       stored (not added) to PROD.  We also avoid a loop for zeroing.  */
     213    v_limb = up[0];
     214    if (v_limb <= 1)
     215      {
     216        if (v_limb == 1)
     217  	MPN_COPY (prodp, up, size);
     218        else
     219  	MPN_ZERO (prodp, size);
     220        cy_limb = 0;
     221      }
     222    else
     223      cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
     224  
     225    prodp[size] = cy_limb;
     226    prodp++;
     227  
     228    /* For each iteration in the outer loop, multiply one limb from
     229       U with one limb from V, and add it to PROD.  */
     230    for (i = 1; i < size; i++)
     231      {
     232        v_limb = up[i];
     233        if (v_limb <= 1)
     234  	{
     235  	  cy_limb = 0;
     236  	  if (v_limb == 1)
     237  	    cy_limb = mpn_add_n (prodp, prodp, up, size);
     238  	}
     239        else
     240  	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
     241  
     242        prodp[size] = cy_limb;
     243        prodp++;
     244      }
     245  }
     246  
     247  void
     248  impn_sqr_n (mp_ptr prodp,
     249  	     mp_srcptr up, mp_size_t size, mp_ptr tspace)
     250  {
     251    if ((size & 1) != 0)
     252      {
     253        /* The size is odd, the code code below doesn't handle that.
     254  	 Multiply the least significant (size - 1) limbs with a recursive
     255  	 call, and handle the most significant limb of S1 and S2
     256  	 separately.  */
     257        /* A slightly faster way to do this would be to make the Karatsuba
     258  	 code below behave as if the size were even, and let it check for
     259  	 odd size in the end.  I.e., in essence move this code to the end.
     260  	 Doing so would save us a recursive call, and potentially make the
     261  	 stack grow a lot less.  */
     262  
     263        mp_size_t esize = size - 1;	/* even size */
     264        mp_limb_t cy_limb;
     265  
     266        MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
     267        cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
     268        prodp[esize + esize] = cy_limb;
     269        cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
     270  
     271        prodp[esize + size] = cy_limb;
     272      }
     273    else
     274      {
     275        mp_size_t hsize = size >> 1;
     276        mp_limb_t cy;
     277  
     278        /*** Product H.	 ________________  ________________
     279  			|_____U1 x U1____||____U0 x U0_____|  */
     280        /* Put result in upper part of PROD and pass low part of TSPACE
     281  	 as new TSPACE.  */
     282        MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
     283  
     284        /*** Product M.	 ________________
     285  			|_(U1-U0)(U0-U1)_|  */
     286        if (mpn_cmp (up + hsize, up, hsize) >= 0)
     287  	{
     288  	  mpn_sub_n (prodp, up + hsize, up, hsize);
     289  	}
     290        else
     291  	{
     292  	  mpn_sub_n (prodp, up, up + hsize, hsize);
     293  	}
     294  
     295        /* Read temporary operands from low part of PROD.
     296  	 Put result in low part of TSPACE using upper part of TSPACE
     297  	 as new TSPACE.  */
     298        MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
     299  
     300        /*** Add/copy product H.  */
     301        MPN_COPY (prodp + hsize, prodp + size, hsize);
     302        cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
     303  
     304        /*** Add product M (if NEGFLG M is a negative number).  */
     305        cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
     306  
     307        /*** Product L.	 ________________  ________________
     308  			|________________||____U0 x U0_____|  */
     309        /* Read temporary operands from low part of PROD.
     310  	 Put result in low part of TSPACE using upper part of TSPACE
     311  	 as new TSPACE.  */
     312        MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
     313  
     314        /*** Add/copy Product L (twice).  */
     315  
     316        cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
     317        if (cy)
     318  	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
     319  
     320        MPN_COPY (prodp, tspace, hsize);
     321        cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
     322        if (cy)
     323  	mpn_add_1 (prodp + size, prodp + size, size, 1);
     324      }
     325  }
     326  
     327  /* This should be made into an inline function in gmp.h.  */
     328  void
     329  mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
     330  {
     331    TMP_DECL (marker);
     332    TMP_MARK (marker);
     333    if (up == vp)
     334      {
     335        if (size < KARATSUBA_THRESHOLD)
     336  	{
     337  	  impn_sqr_n_basecase (prodp, up, size);
     338  	}
     339        else
     340  	{
     341  	  mp_ptr tspace;
     342  	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
     343  	  impn_sqr_n (prodp, up, size, tspace);
     344  	}
     345      }
     346    else
     347      {
     348        if (size < KARATSUBA_THRESHOLD)
     349  	{
     350  	  impn_mul_n_basecase (prodp, up, vp, size);
     351  	}
     352        else
     353  	{
     354  	  mp_ptr tspace;
     355  	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
     356  	  impn_mul_n (prodp, up, vp, size, tspace);
     357  	}
     358      }
     359    TMP_FREE (marker);
     360  }