(root)/
gmp-6.3.0/
tune/
speed-ext.c
       1  /* An example of extending the speed program to measure routines not in GMP.
       2  
       3  Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
       4  
       5  This file is part of the GNU MP Library.
       6  
       7  The GNU MP Library is free software; you can redistribute it and/or modify
       8  it under the terms of either:
       9  
      10    * the GNU Lesser General Public License as published by the Free
      11      Software Foundation; either version 3 of the License, or (at your
      12      option) any later version.
      13  
      14  or
      15  
      16    * the GNU General Public License as published by the Free Software
      17      Foundation; either version 2 of the License, or (at your option) any
      18      later version.
      19  
      20  or both in parallel, as here.
      21  
      22  The GNU MP Library is distributed in the hope that it will be useful, but
      23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      25  for more details.
      26  
      27  You should have received copies of the GNU General Public License and the
      28  GNU Lesser General Public License along with the GNU MP Library.  If not,
      29  see https://www.gnu.org/licenses/.  */
      30  
      31  
      32  /* The extension here is three versions of an mpn arithmetic mean.  These
      33     aren't meant to be particularly useful, just examples.
      34  
      35     You can run something like the following to compare their speeds.
      36  
      37             ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
      38  
      39     On RISC chips, mean_open() might be fastest if the compiler is doing a
      40     good job.  On the register starved x86s, mean_calls will be fastest.
      41  
      42  
      43     Notes:
      44  
      45     SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
      46     by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
      47     files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
      48  
      49     The advantage of this #include "speed.c" scheme is that there's no
      50     editing of a copy of that file, and new features in new versions of it
      51     will be immediately available.
      52  
      53     In a real program the routines mean_calls() etc would probably be in
      54     separate C or assembler source files, and just the measuring
      55     speed_mean_calls() etc would be here.  Linking against other libraries
      56     for things to measure is perfectly possible too.
      57  
      58     When attempting to compare two versions of the same named routine, say
      59     like the generic and assembler versions of mpn_add_n(), creative use of
      60     cc -D or #define is suggested, so one or both can be renamed and linked
      61     into the same program.  It'll be much easier to compare them side by side
      62     than with separate programs for each.
      63  
      64     common.c has notes on writing speed measuring routines.
      65  
      66     Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
      67     not using libtool) to get common.o and other objects needed by speed.c.  */
      68  
      69  
      70  #define SPEED_EXTRA_PROTOS                                              \
      71    double speed_mean_calls (struct speed_params *s);			\
      72    double speed_mean_open  (struct speed_params *s);			\
      73    double speed_mean_open2 (struct speed_params *s);
      74  
      75  #define SPEED_EXTRA_ROUTINES            \
      76    { "mean_calls",  speed_mean_calls  }, \
      77    { "mean_open",   speed_mean_open   }, \
      78    { "mean_open2",  speed_mean_open2  },
      79  
      80  #include "speed.c"
      81  
      82  
      83  /* A straightforward implementation calling mpn subroutines.
      84  
      85     wp,size is set to (xp,size + yp,size) / 2.  The return value is the
      86     remainder from the division.  The other versions are the same.  */
      87  
      88  mp_limb_t
      89  mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
      90  {
      91    mp_limb_t  c, ret;
      92  
      93    ASSERT (size >= 1);
      94  
      95    c = mpn_add_n (wp, xp, yp, size);
      96    ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
      97    wp[size-1] |= (c << (GMP_LIMB_BITS-1));
      98    return ret;
      99  }
     100  
     101  
     102  /* An open-coded version, making one pass over the data.  The right shift is
     103     done as the added limbs are produced.  The addition code follows
     104     mpn/generic/add_n.c. */
     105  
     106  mp_limb_t
     107  mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
     108  {
     109    mp_limb_t  w, wprev, x, y, c, ret;
     110    mp_size_t  i;
     111  
     112    ASSERT (size >= 1);
     113  
     114    x = xp[0];
     115    y = yp[0];
     116  
     117    wprev = x + y;
     118    c = (wprev < x);
     119    ret = (wprev & 1);
     120  
     121  #define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
     122  
     123    for (i = 1; i < size; i++)
     124      {
     125        x = xp[i];
     126        y = yp[i];
     127  
     128        w = x + c;
     129        c = (w < x);
     130        w += y;
     131        c += (w < y);
     132  
     133        wp[i-1] = RSHIFT (w, wprev);
     134        wprev = w;
     135      }
     136  
     137    wp[i-1] = RSHIFT (c, wprev);
     138  
     139    return ret;
     140  }
     141  
     142  
     143  /* Another one-pass version, but right shifting the source limbs rather than
     144     the result limbs.  There's not much chance of this being better than the
     145     above, but it's an alternative at least. */
     146  
     147  mp_limb_t
     148  mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
     149  {
     150    mp_limb_t  w, x, y, xnext, ynext, c, ret;
     151    mp_size_t  i;
     152  
     153    ASSERT (size >= 1);
     154  
     155    x = xp[0];
     156    y = yp[0];
     157  
     158    /* ret is the low bit of x+y, c is the carry out of that low bit add */
     159    ret = (x ^ y) & 1;
     160    c   = (x & y) & 1;
     161  
     162    for (i = 0; i < size-1; i++)
     163      {
     164        xnext = xp[i+1];
     165        ynext = yp[i+1];
     166        x = RSHIFT (xnext, x);
     167        y = RSHIFT (ynext, y);
     168  
     169        w = x + c;
     170        c = (w < x);
     171        w += y;
     172        c += (w < y);
     173        wp[i] = w;
     174  
     175        x = xnext;
     176        y = ynext;
     177      }
     178  
     179    wp[i] = (x >> 1) + (y >> 1) + c;
     180  
     181    return ret;
     182  }
     183  
     184  
     185  /* The speed measuring routines are the same apart from which function they
     186     run, so a macro is used.  Actually this macro is the same as
     187     SPEED_ROUTINE_MPN_BINARY_N.  */
     188  
     189  #define SPEED_ROUTINE_MEAN(mean_fun)                    \
     190    {                                                     \
     191      unsigned  i;                                        \
     192      mp_ptr    wp;                                       \
     193      double    t;                                        \
     194      TMP_DECL;                                  \
     195                                                          \
     196      SPEED_RESTRICT_COND (s->size >= 1);                 \
     197                                                          \
     198      TMP_MARK;                                  \
     199      SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);   \
     200                                                          \
     201      speed_operand_src (s, s->xp, s->size);              \
     202      speed_operand_src (s, s->yp, s->size);              \
     203      speed_operand_dst (s, wp, s->size);                 \
     204      speed_cache_fill (s);                               \
     205                                                          \
     206      speed_starttime ();                                 \
     207      i = s->reps;                                        \
     208      do                                                  \
     209        mean_fun (wp, s->xp, s->yp, s->size);             \
     210      while (--i != 0);                                   \
     211      t = speed_endtime ();                               \
     212                                                          \
     213      TMP_FREE;                                  \
     214      return t;                                           \
     215    }
     216  
     217  double
     218  speed_mean_calls (struct speed_params *s)
     219  {
     220    SPEED_ROUTINE_MEAN (mean_calls);
     221  }
     222  
     223  double
     224  speed_mean_open (struct speed_params *s)
     225  {
     226    SPEED_ROUTINE_MEAN (mean_open);
     227  }
     228  
     229  double
     230  speed_mean_open2 (struct speed_params *s)
     231  {
     232    SPEED_ROUTINE_MEAN (mean_open2);
     233  }