(root)/
mpfr-4.2.1/
tune/
bidimensional_sample.c
       1  /* [Description]
       2  
       3  Copyright 2010-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #include <stdlib.h>
      24  #include <time.h>
      25  
      26  #define MPFR_NEED_LONGLONG_H
      27  #include "mpfr-impl.h"
      28  
      29  #undef _PROTO
      30  #define _PROTO __GMP_PROTO
      31  #include "speed.h"
      32  
      33  /* Let f be a function for which we have several implementations f1, f2... */
      34  /* We wish to have a quick overview of which implementation is the best    */
      35  /* in function of the point x where f(x) is computed and of the precision */
      36  /* prec requested by the user.                                             */
      37  /* This is performed by drawing a 2D graphic with color indicating which   */
      38  /* method is the best.                                                     */
      39  /* For building this graphic, the following structur is used (see the core */
      40  /* of generate_2D_sample for an explanation of each field.                 */
      41  struct speed_params2D
      42  {
      43    /* x-window: [min_x, max_x] or [2^min_x, 2^max_x]                        */
      44    /*           or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x]            */
      45    /* depending on the value of logarithmic_scale_x                         */
      46    double min_x;
      47    double max_x;
      48  
      49    /* prec-window: [min_prec, max_prec] */
      50    mpfr_prec_t min_prec;
      51    mpfr_prec_t max_prec;
      52  
      53    /* number of sample points for the x-axis and the prec-axis */
      54    int nb_points_x;
      55    int nb_points_prec;
      56  
      57    /* should the sample points be logarithmically scaled or not */
      58    int logarithmic_scale_x;
      59    int logarithmic_scale_prec;
      60  
      61    /* list of functions g1, g2... measuring the execution time of f1, f2...  */
      62    /* when given a point x and a precision prec stored in s.                 */
      63    /* We use s->xp to store the significant of x, s->r to store its exponent */
      64    /* s->align_xp to store its sign, and s->size to store prec.              */
      65    double (**speed_funcs) (struct speed_params *s);
      66  };
      67  
      68  /* Given an array t of nb_functions double indicating the timings of several */
      69  /* implementations, return i, such that t[i] is the best timing.             */
      70  int
      71  find_best (double *t, int nb_functions)
      72  {
      73    int i, ibest;
      74    double best;
      75  
      76    if (nb_functions<=0)
      77      {
      78        fprintf (stderr, "There is no function\n");
      79        abort ();
      80      }
      81  
      82    ibest = 0;
      83    best = t[0];
      84    for (i=1; i<nb_functions; i++)
      85      {
      86        if (t[i]<best)
      87          {
      88            best = t[i];
      89            ibest = i;
      90          }
      91      }
      92  
      93    return ibest;
      94  }
      95  
      96  void generate_2D_sample (FILE *output, struct speed_params2D param)
      97  {
      98    mpfr_t temp;
      99    double incr_prec;
     100    mpfr_t incr_x;
     101    mpfr_t x, x2;
     102    double prec;
     103    struct speed_params s;
     104    int i;
     105    int test;
     106    int nb_functions;
     107    double *t; /* store the timing of each implementation */
     108  
     109    /* We first determine how many implementations we have */
     110    nb_functions = 0;
     111    while (param.speed_funcs[nb_functions] != NULL)
     112      nb_functions++;
     113  
     114    t = malloc (nb_functions * sizeof (double));
     115    if (t == NULL)
     116      {
     117        fprintf (stderr, "Can't allocate memory.\n");
     118        abort ();
     119      }
     120  
     121  
     122    mpfr_init2 (temp, MPFR_SMALL_PRECISION);
     123  
     124    /* The precision is sampled from min_prec to max_prec with        */
     125    /* approximately nb_points_prec points. If logarithmic_scale_prec */
     126    /* is true, the precision is multiplied by incr_prec at each      */
     127    /* step. Otherwise, incr_prec is added at each step.              */
     128    if (param.logarithmic_scale_prec)
     129      {
     130        mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU);
     131        mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU);
     132        mpfr_root (temp, temp,
     133                   (unsigned long int)param.nb_points_prec, MPFR_RNDU);
     134        incr_prec = mpfr_get_d (temp, MPFR_RNDU);
     135      }
     136    else
     137      {
     138        incr_prec = (double)param.max_prec - (double)param.min_prec;
     139        incr_prec = incr_prec/((double)param.nb_points_prec);
     140      }
     141  
     142    /* The points x are sampled according to the following rule:             */
     143    /* If logarithmic_scale_x = 0:                                           */
     144    /*    nb_points_x points are equally distributed between min_x and max_x */
     145    /* If logarithmic_scale_x = 1:                                           */
     146    /*    nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At     */
     147    /*    each step, the current point is multiplied by incr_x.              */
     148    /* If logarithmic_scale_x = -1:                                          */
     149    /*    nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x)     */
     150    /*    (at each step, the current point is divided by incr_x);  and       */
     151    /*    nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x)       */
     152    /*    (at each step, the current point is multiplied by incr_x).         */
     153    mpfr_init2 (incr_x, param.max_prec);
     154    if (param.logarithmic_scale_x == 0)
     155      {
     156        mpfr_set_d (incr_x,
     157                    (param.max_x - param.min_x)/(double)param.nb_points_x,
     158                    MPFR_RNDU);
     159      }
     160    else if (param.logarithmic_scale_x == -1)
     161      {
     162        mpfr_set_d (incr_x,
     163                    2.*(param.max_x - param.min_x)/(double)param.nb_points_x,
     164                    MPFR_RNDU);
     165        mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
     166      }
     167    else
     168      { /* other values of param.logarithmic_scale_x are considered as 1 */
     169        mpfr_set_d (incr_x,
     170                    (param.max_x - param.min_x)/(double)param.nb_points_x,
     171                    MPFR_RNDU);
     172        mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
     173      }
     174  
     175    /* Main loop */
     176    mpfr_init2 (x, param.max_prec);
     177    mpfr_init2 (x2, param.max_prec);
     178    prec = (double)param.min_prec;
     179    while (prec <= param.max_prec)
     180      {
     181        printf ("prec = %d\n", (int)prec);
     182        if (param.logarithmic_scale_x == 0)
     183          mpfr_set_d (temp, param.min_x, MPFR_RNDU);
     184        else if (param.logarithmic_scale_x == -1)
     185          {
     186            mpfr_set_d (temp, param.max_x, MPFR_RNDD);
     187            mpfr_exp2 (temp, temp, MPFR_RNDD);
     188            mpfr_neg (temp, temp, MPFR_RNDU);
     189          }
     190        else
     191          {
     192            mpfr_set_d (temp, param.min_x, MPFR_RNDD);
     193            mpfr_exp2 (temp, temp, MPFR_RNDD);
     194          }
     195  
     196        /* We perturb x a little bit, in order to avoid trailing zeros that */
     197        /* might change the behavior of algorithms.                         */
     198        mpfr_const_pi (x, MPFR_RNDN);
     199        mpfr_div_2ui (x, x, 7, MPFR_RNDN);
     200        mpfr_add_ui (x, x, 1, MPFR_RNDN);
     201        mpfr_mul (x, x, temp, MPFR_RNDN);
     202  
     203        test = 1;
     204        while (test)
     205          {
     206            mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN));
     207            mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec);
     208  
     209            s.r = (mp_limb_t)mpfr_get_exp (x);
     210            s.size = (mpfr_prec_t)prec;
     211            s.align_xp = (mpfr_sgn (x) > 0)?1:2;
     212            mpfr_set_prec (x2, (mpfr_prec_t)prec);
     213            mpfr_set (x2, x, MPFR_RNDU);
     214            s.xp = x2->_mpfr_d;
     215  
     216            for (i=0; i<nb_functions; i++)
     217              {
     218                t[i] = speed_measure (param.speed_funcs[i], &s);
     219                mpfr_fprintf (output, "%e\t", t[i]);
     220              }
     221            fprintf (output, "%d\n", 1 + find_best (t, nb_functions));
     222  
     223            if (param.logarithmic_scale_x == 0)
     224              {
     225                mpfr_add (x, x, incr_x, MPFR_RNDU);
     226                if (mpfr_cmp_d (x, param.max_x) > 0)
     227                  test=0;
     228              }
     229            else
     230              {
     231                if (mpfr_sgn (x) < 0 )
     232                  { /* if x<0, it means that logarithmic_scale_x=-1 */
     233                    mpfr_div (x, x, incr_x, MPFR_RNDU);
     234                    mpfr_abs (temp, x, MPFR_RNDD);
     235                    mpfr_log2 (temp, temp, MPFR_RNDD);
     236                    if (mpfr_cmp_d (temp, param.min_x) < 0)
     237                      mpfr_neg (x, x, MPFR_RNDN);
     238                  }
     239                else
     240                  {
     241                    mpfr_mul (x, x, incr_x, MPFR_RNDU);
     242                    mpfr_set (temp, x, MPFR_RNDD);
     243                    mpfr_log2 (temp, temp, MPFR_RNDD);
     244                    if (mpfr_cmp_d (temp, param.max_x) > 0)
     245                      test=0;
     246                  }
     247              }
     248          }
     249  
     250        prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec)
     251                 : (prec + incr_prec) );
     252        fprintf (output, "\n");
     253      }
     254  
     255    free (t);
     256    mpfr_clear (incr_x);
     257    mpfr_clear (x);
     258    mpfr_clear (x2);
     259    mpfr_clear (temp);
     260  
     261    return;
     262  }
     263  
     264  #define SPEED_MPFR_FUNC_2D(mean_func)                   \
     265    do                                                    \
     266      {                                                   \
     267        double t;                                         \
     268        unsigned i;                                       \
     269        mpfr_t w, x;                                      \
     270        mp_size_t size;                                   \
     271                                                          \
     272        SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     273        SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     274                                                          \
     275        size = (s->size-1)/GMP_NUMB_BITS+1;               \
     276        s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     277        MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     278        MPFR_SET_EXP (x, (mpfr_exp_t) s->r);              \
     279        if (s->align_xp == 2) MPFR_SET_NEG (x);           \
     280                                                          \
     281        mpfr_init2 (w, s->size);                          \
     282        speed_starttime ();                               \
     283        i = s->reps;                                      \
     284                                                          \
     285        do                                                \
     286          mean_func (w, x, MPFR_RNDN);                    \
     287        while (--i != 0);                                 \
     288        t = speed_endtime ();                             \
     289                                                          \
     290        mpfr_clear (w);                                   \
     291        return t;                                         \
     292      }                                                   \
     293    while (0)
     294  
     295  mpfr_prec_t mpfr_exp_2_threshold;
     296  mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD;
     297  #undef  MPFR_EXP_2_THRESHOLD
     298  #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
     299  #include "exp_2.c"
     300  
     301  double
     302  timing_exp1 (struct speed_params *s)
     303  {
     304    mpfr_exp_2_threshold = s->size+1;
     305    SPEED_MPFR_FUNC_2D (mpfr_exp_2);
     306  }
     307  
     308  double
     309  timing_exp2 (struct speed_params *s)
     310  {
     311    mpfr_exp_2_threshold = s->size-1;
     312    SPEED_MPFR_FUNC_2D (mpfr_exp_2);
     313  }
     314  
     315  double
     316  timing_exp3 (struct speed_params *s)
     317  {
     318    SPEED_MPFR_FUNC_2D (mpfr_exp_3);
     319  }
     320  
     321  
     322  #include "ai.c"
     323  double
     324  timing_ai1 (struct speed_params *s)
     325  {
     326    SPEED_MPFR_FUNC_2D (mpfr_ai1);
     327  }
     328  
     329  double
     330  timing_ai2 (struct speed_params *s)
     331  {
     332    SPEED_MPFR_FUNC_2D (mpfr_ai2);
     333  }
     334  
     335  /* These functions are for testing purpose only */
     336  /* They are used to draw which method is actually used */
     337  double
     338  virtual_timing_ai1 (struct speed_params *s)
     339  {
     340    double t;
     341    unsigned i;
     342    mpfr_t w, x;
     343    mp_size_t size;
     344    mpfr_t temp1, temp2;
     345  
     346    SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
     347    SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
     348  
     349    size = (s->size-1)/GMP_NUMB_BITS+1;
     350    s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
     351    MPFR_TMP_INIT1 (s->xp, x, s->size);
     352    MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
     353    if (s->align_xp == 2) MPFR_SET_NEG (x);
     354  
     355    mpfr_init2 (w, s->size);
     356    speed_starttime ();
     357    i = s->reps;
     358  
     359    mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
     360    mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
     361  
     362    mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
     363    mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
     364    mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
     365  
     366    if (MPFR_IS_NEG (x))
     367        mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
     368    else
     369        mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
     370  
     371    mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
     372  
     373    if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
     374      t = 1000.;
     375    else
     376      t = 1.;
     377  
     378    mpfr_clear (temp1);
     379    mpfr_clear (temp2);
     380  
     381    return t;
     382  }
     383  
     384  double
     385  virtual_timing_ai2 (struct speed_params *s)
     386  {
     387    double t;
     388    unsigned i;
     389    mpfr_t w, x;
     390    mp_size_t size;
     391    mpfr_t temp1, temp2;
     392  
     393    SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
     394    SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
     395  
     396    size = (s->size-1)/GMP_NUMB_BITS+1;
     397    s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
     398    MPFR_TMP_INIT1 (s->xp, x, s->size);
     399    MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
     400    if (s->align_xp == 2) MPFR_SET_NEG (x);
     401  
     402    mpfr_init2 (w, s->size);
     403    speed_starttime ();
     404    i = s->reps;
     405  
     406    mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
     407    mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
     408  
     409    mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
     410    mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
     411    mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
     412  
     413    if (MPFR_IS_NEG (x))
     414        mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
     415    else
     416        mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
     417  
     418    mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
     419  
     420    if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
     421      t = 1.;
     422    else
     423      t = 1000.;
     424  
     425    mpfr_clear (temp1);
     426    mpfr_clear (temp2);
     427  
     428    return t;
     429  }
     430  
     431  int
     432  main (void)
     433  {
     434    FILE *output;
     435    struct speed_params2D param;
     436    double (*speed_funcs[3]) (struct speed_params *s);
     437  
     438    /* char filename[256] = "virtual_timing_ai.dat"; */
     439    /* speed_funcs[0] = virtual_timing_ai1; */
     440    /* speed_funcs[1] = virtual_timing_ai2; */
     441  
     442    char filename[256] = "airy.dat";
     443    speed_funcs[0] = timing_ai1;
     444    speed_funcs[1] = timing_ai2;
     445  
     446    speed_funcs[2] = NULL;
     447    output = fopen (filename, "w");
     448    if (output == NULL)
     449      {
     450        fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
     451        abort ();
     452      }
     453    param.min_x = -80;
     454    param.max_x = 60;
     455    param.min_prec = 50;
     456    param.max_prec = 1500;
     457    param.nb_points_x = 200;
     458    param.nb_points_prec = 200;
     459    param.logarithmic_scale_x  = 0;
     460    param.logarithmic_scale_prec = 0;
     461    param.speed_funcs = speed_funcs;
     462  
     463    generate_2D_sample (output, param);
     464  
     465    fclose (output);
     466    mpfr_free_cache ();
     467    return 0;
     468  }