(root)/
gmp-6.3.0/
mpn/
generic/
mulmid.c
       1  /* mpn_mulmid -- middle product
       2  
       3     Contributed by David Harvey.
       4  
       5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
       6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
       7     GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
       8  
       9  Copyright 2011 Free Software Foundation, Inc.
      10  
      11  This file is part of the GNU MP Library.
      12  
      13  The GNU MP Library is free software; you can redistribute it and/or modify
      14  it under the terms of either:
      15  
      16    * the GNU Lesser General Public License as published by the Free
      17      Software Foundation; either version 3 of the License, or (at your
      18      option) any later version.
      19  
      20  or
      21  
      22    * the GNU General Public License as published by the Free Software
      23      Foundation; either version 2 of the License, or (at your option) any
      24      later version.
      25  
      26  or both in parallel, as here.
      27  
      28  The GNU MP Library is distributed in the hope that it will be useful, but
      29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      31  for more details.
      32  
      33  You should have received copies of the GNU General Public License and the
      34  GNU Lesser General Public License along with the GNU MP Library.  If not,
      35  see https://www.gnu.org/licenses/.  */
      36  
      37  
      38  #include "gmp-impl.h"
      39  
      40  
      41  #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
      42  
      43  
      44  void
      45  mpn_mulmid (mp_ptr rp,
      46              mp_srcptr ap, mp_size_t an,
      47              mp_srcptr bp, mp_size_t bn)
      48  {
      49    mp_size_t rn, k;
      50    mp_ptr scratch, temp;
      51  
      52    ASSERT (an >= bn);
      53    ASSERT (bn >= 1);
      54    ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
      55    ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
      56  
      57    if (bn < MULMID_TOOM42_THRESHOLD)
      58      {
      59        /* region not tall enough to make toom42 worthwhile for any portion */
      60  
      61        if (an < CHUNK)
      62  	{
      63  	  /* region not too wide either, just call basecase directly */
      64  	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
      65  	  return;
      66  	}
      67  
      68        /* Region quite wide. For better locality, use basecase on chunks:
      69  
      70  	 AAABBBCC..
      71  	 .AAABBBCC.
      72  	 ..AAABBBCC
      73        */
      74  
      75        k = CHUNK - bn + 1;    /* number of diagonals per chunk */
      76  
      77        /* first chunk (marked A in the above diagram) */
      78        mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
      79  
      80        /* remaining chunks (B, C, etc) */
      81        an -= k;
      82  
      83        while (an >= CHUNK)
      84  	{
      85  	  mp_limb_t t0, t1, cy;
      86  	  ap += k, rp += k;
      87  	  t0 = rp[0], t1 = rp[1];
      88  	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
      89  	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
      90  	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
      91  	  an -= k;
      92  	}
      93  
      94        if (an >= bn)
      95  	{
      96  	  /* last remaining chunk */
      97  	  mp_limb_t t0, t1, cy;
      98  	  ap += k, rp += k;
      99  	  t0 = rp[0], t1 = rp[1];
     100  	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
     101  	  ADDC_LIMB (cy, rp[0], rp[0], t0);
     102  	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
     103  	}
     104  
     105        return;
     106      }
     107  
     108    /* region is tall enough for toom42 */
     109  
     110    rn = an - bn + 1;
     111  
     112    if (rn < MULMID_TOOM42_THRESHOLD)
     113      {
     114        /* region not wide enough to make toom42 worthwhile for any portion */
     115  
     116        TMP_DECL;
     117  
     118        if (bn < CHUNK)
     119  	{
     120  	  /* region not too tall either, just call basecase directly */
     121  	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
     122  	  return;
     123  	}
     124  
     125        /* Region quite tall. For better locality, use basecase on chunks:
     126  
     127  	 AAAAA....
     128  	 .AAAAA...
     129  	 ..BBBBB..
     130  	 ...BBBBB.
     131  	 ....CCCCC
     132        */
     133  
     134        TMP_MARK;
     135  
     136        temp = TMP_ALLOC_LIMBS (rn + 2);
     137  
     138        /* first chunk (marked A in the above diagram) */
     139        bp += bn - CHUNK, an -= bn - CHUNK;
     140        mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
     141  
     142        /* remaining chunks (B, C, etc) */
     143        bn -= CHUNK;
     144  
     145        while (bn >= CHUNK)
     146  	{
     147  	  ap += CHUNK, bp -= CHUNK;
     148  	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
     149  	  mpn_add_n (rp, rp, temp, rn + 2);
     150  	  bn -= CHUNK;
     151  	}
     152  
     153        if (bn)
     154  	{
     155  	  /* last remaining chunk */
     156  	  ap += CHUNK, bp -= bn;
     157  	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
     158  	  mpn_add_n (rp, rp, temp, rn + 2);
     159  	}
     160  
     161        TMP_FREE;
     162        return;
     163      }
     164  
     165    /* we're definitely going to use toom42 somewhere */
     166  
     167    if (bn > rn)
     168      {
     169        /* slice region into chunks, use toom42 on all chunks except possibly
     170  	 the last:
     171  
     172           AA....
     173           .AA...
     174           ..BB..
     175           ...BB.
     176           ....CC
     177        */
     178  
     179        TMP_DECL;
     180        TMP_MARK;
     181  
     182        temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
     183        scratch = temp + rn + 2;
     184  
     185        /* first chunk (marked A in the above diagram) */
     186        bp += bn - rn;
     187        mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
     188  
     189        /* remaining chunks (B, C, etc) */
     190        bn -= rn;
     191  
     192        while (bn >= rn)
     193          {
     194            ap += rn, bp -= rn;
     195  	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
     196            mpn_add_n (rp, rp, temp, rn + 2);
     197            bn -= rn;
     198          }
     199  
     200        if (bn)
     201          {
     202            /* last remaining chunk */
     203            ap += rn, bp -= bn;
     204  	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
     205            mpn_add_n (rp, rp, temp, rn + 2);
     206          }
     207  
     208        TMP_FREE;
     209      }
     210    else
     211      {
     212        /* slice region into chunks, use toom42 on all chunks except possibly
     213  	 the last:
     214  
     215           AAABBBCC..
     216           .AAABBBCC.
     217           ..AAABBBCC
     218        */
     219  
     220        TMP_DECL;
     221        TMP_MARK;
     222  
     223        scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
     224  
     225        /* first chunk (marked A in the above diagram) */
     226        mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
     227  
     228        /* remaining chunks (B, C, etc) */
     229        rn -= bn;
     230  
     231        while (rn >= bn)
     232          {
     233  	  mp_limb_t t0, t1, cy;
     234            ap += bn, rp += bn;
     235            t0 = rp[0], t1 = rp[1];
     236            mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
     237  	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
     238  	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
     239  	  rn -= bn;
     240          }
     241  
     242        TMP_FREE;
     243  
     244        if (rn)
     245          {
     246            /* last remaining chunk */
     247  	  mp_limb_t t0, t1, cy;
     248            ap += bn, rp += bn;
     249            t0 = rp[0], t1 = rp[1];
     250            mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
     251  	  ADDC_LIMB (cy, rp[0], rp[0], t0);
     252  	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
     253          }
     254      }
     255  }