(root)/
mpfr-4.2.1/
tune/
tuneup.c
       1  /* Tune various threshold of MPFR
       2  
       3  Copyright 2005-2023 Free Software Foundation, Inc.
       4  Contributed by the AriC and Caramba projects, INRIA.
       5  
       6  This file is part of the GNU MPFR Library.
       7  
       8  The GNU MPFR Library is free software; you can redistribute it and/or modify
       9  it under the terms of the GNU Lesser General Public License as published by
      10  the Free Software Foundation; either version 3 of the License, or (at your
      11  option) any later version.
      12  
      13  The GNU MPFR Library is distributed in the hope that it will be useful, but
      14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
      16  License for more details.
      17  
      18  You should have received a copy of the GNU Lesser General Public License
      19  along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
      20  https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
      21  51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
      22  
      23  #include <stdlib.h>
      24  #include <time.h>
      25  
      26  #define MPFR_NEED_LONGLONG_H
      27  #include "mpfr-impl.h"
      28  
      29  #undef _PROTO
      30  #define _PROTO __GMP_PROTO
      31  #include "speed.h"
      32  
      33  /* Undefine static assertion system */
      34  #undef MPFR_STAT_STATIC_ASSERT
      35  #define MPFR_STAT_STATIC_ASSERT(a) MPFR_ASSERTN(a)
      36  
      37  int verbose;
      38  
      39  /* template for an unary function */
      40  /* s->size: precision of both input and output
      41     s->xp  : Mantissa of first input
      42     s->yp  : mantissa of second input                    */
      43  #define SPEED_MPFR_FUNC(mean_fun)                       \
      44    do                                                    \
      45      {                                                   \
      46        unsigned  i;                                      \
      47        mpfr_limb_ptr wp;                                 \
      48        double    t;                                      \
      49        mpfr_t    w, x;                                   \
      50        mp_size_t size;                                   \
      51        MPFR_TMP_DECL (marker);                           \
      52                                                          \
      53        SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
      54        SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
      55        MPFR_TMP_MARK (marker);                           \
      56                                                          \
      57        size = (s->size-1)/GMP_NUMB_BITS+1;               \
      58        s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
      59        MPFR_TMP_INIT1 (s->xp, x, s->size);               \
      60        MPFR_SET_EXP (x, 0);                              \
      61                                                          \
      62        MPFR_TMP_INIT (wp, w, s->size, size);             \
      63                                                          \
      64        speed_operand_src (s, s->xp, size);               \
      65        speed_operand_dst (s, wp, size);                  \
      66        speed_cache_fill (s);                             \
      67                                                          \
      68        speed_starttime ();                               \
      69        i = s->reps;                                      \
      70        do                                                \
      71          mean_fun (w, x, MPFR_RNDN);                     \
      72        while (--i != 0);                                 \
      73        t = speed_endtime ();                             \
      74                                                          \
      75        MPFR_TMP_FREE (marker);                           \
      76        return t;                                         \
      77      }                                                   \
      78    while (0)
      79  
      80  /* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */
      81  #define SPEED_MPFR_FUNC2(mean_fun)                      \
      82    do                                                    \
      83      {                                                   \
      84        unsigned  i;                                      \
      85        mpfr_limb_ptr vp, wp;                             \
      86        double    t;                                      \
      87        mpfr_t    v, w, x;                                \
      88        mp_size_t size;                                   \
      89        MPFR_TMP_DECL (marker);                           \
      90                                                          \
      91        SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
      92        SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
      93        MPFR_TMP_MARK (marker);                           \
      94                                                          \
      95        size = (s->size-1)/GMP_NUMB_BITS+1;               \
      96        s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
      97        MPFR_TMP_INIT1 (s->xp, x, s->size);               \
      98        MPFR_SET_EXP (x, 0);                              \
      99                                                          \
     100        MPFR_TMP_INIT (vp, v, s->size, size);             \
     101        MPFR_TMP_INIT (wp, w, s->size, size);             \
     102                                                          \
     103        speed_operand_src (s, s->xp, size);               \
     104        speed_operand_dst (s, vp, size);                  \
     105        speed_operand_dst (s, wp, size);                  \
     106        speed_cache_fill (s);                             \
     107                                                          \
     108        speed_starttime ();                               \
     109        i = s->reps;                                      \
     110        do                                                \
     111          mean_fun (v, w, x, MPFR_RNDN);                  \
     112        while (--i != 0);                                 \
     113        t = speed_endtime ();                             \
     114                                                          \
     115        MPFR_TMP_FREE (marker);                           \
     116        return t;                                         \
     117      }                                                   \
     118    while (0)
     119  
     120  /* template for a function like mpfr_mul */
     121  #define SPEED_MPFR_OP(mean_fun)                         \
     122    do                                                    \
     123      {                                                   \
     124        unsigned  i;                                      \
     125        mpfr_limb_ptr wp;                                 \
     126        double    t;                                      \
     127        mpfr_t    w, x, y;                                \
     128        mp_size_t size;                                   \
     129        MPFR_TMP_DECL (marker);                           \
     130                                                          \
     131        SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     132        SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     133        MPFR_TMP_MARK (marker);                           \
     134                                                          \
     135        size = (s->size-1)/GMP_NUMB_BITS+1;               \
     136        s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     137        MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     138        MPFR_SET_EXP (x, 0);                              \
     139        s->yp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     140        MPFR_TMP_INIT1 (s->yp, y, s->size);               \
     141        MPFR_SET_EXP (y, 0);                              \
     142                                                          \
     143        MPFR_TMP_INIT (wp, w, s->size, size);             \
     144                                                          \
     145        speed_operand_src (s, s->xp, size);               \
     146        speed_operand_src (s, s->yp, size);               \
     147        speed_operand_dst (s, wp, size);                  \
     148        speed_cache_fill (s);                             \
     149                                                          \
     150        speed_starttime ();                               \
     151        i = s->reps;                                      \
     152        do                                                \
     153          mean_fun (w, x, y, MPFR_RNDN);                  \
     154        while (--i != 0);                                 \
     155        t = speed_endtime ();                             \
     156                                                          \
     157        MPFR_TMP_FREE (marker);                           \
     158        return t;                                         \
     159      }                                                   \
     160    while (0)
     161  
     162  /* special template for mpfr_mul(a,b,b) */
     163  #define SPEED_MPFR_SQR(mean_fun)                        \
     164    do                                                    \
     165      {                                                   \
     166        unsigned  i;                                      \
     167        mpfr_limb_ptr wp;                                 \
     168        double    t;                                      \
     169        mpfr_t    w, x;                                   \
     170        mp_size_t size;                                   \
     171        MPFR_TMP_DECL (marker);                           \
     172                                                          \
     173        SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     174        SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     175        MPFR_TMP_MARK (marker);                           \
     176                                                          \
     177        size = (s->size-1)/GMP_NUMB_BITS+1;               \
     178        s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     179        MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     180        MPFR_SET_EXP (x, 0);                              \
     181                                                          \
     182        MPFR_TMP_INIT (wp, w, s->size, size);             \
     183                                                          \
     184        speed_operand_src (s, s->xp, size);               \
     185        speed_operand_dst (s, wp, size);                  \
     186        speed_cache_fill (s);                             \
     187                                                          \
     188        speed_starttime ();                               \
     189        i = s->reps;                                      \
     190        do                                                \
     191          mean_fun (w, x, x, MPFR_RNDN);                  \
     192        while (--i != 0);                                 \
     193        t = speed_endtime ();                             \
     194                                                          \
     195        MPFR_TMP_FREE (marker);                           \
     196        return t;                                         \
     197      }                                                   \
     198    while (0)
     199  
     200  /* s->size: precision of both input and output
     201     s->xp  : Mantissa of first input
     202     s->r   : exponent
     203     s->align_xp : sign (1 means positive, 2 means negative)
     204  */
     205  #define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun)         \
     206    do                                                    \
     207      {                                                   \
     208        unsigned  i;                                      \
     209        mpfr_limb_ptr wp;                                 \
     210        double    t;                                      \
     211        mpfr_t    w, x;                                   \
     212        mp_size_t size;                                   \
     213        MPFR_TMP_DECL (marker);                           \
     214                                                          \
     215        SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
     216        SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
     217        MPFR_TMP_MARK (marker);                           \
     218                                                          \
     219        size = (s->size-1)/GMP_NUMB_BITS+1;               \
     220        s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
     221        MPFR_TMP_INIT1 (s->xp, x, s->size);               \
     222        MPFR_SET_EXP (x, s->r);                           \
     223        if (s->align_xp == 2) MPFR_SET_NEG (x);           \
     224                                                          \
     225        MPFR_TMP_INIT (wp, w, s->size, size);             \
     226                                                          \
     227        speed_operand_src (s, s->xp, size);               \
     228        speed_operand_dst (s, wp, size);                  \
     229        speed_cache_fill (s);                             \
     230                                                          \
     231        speed_starttime ();                               \
     232        i = s->reps;                                      \
     233        do                                                \
     234          mean_fun (w, x, MPFR_RNDN);                     \
     235        while (--i != 0);                                 \
     236        t = speed_endtime ();                             \
     237                                                          \
     238        MPFR_TMP_FREE (marker);                           \
     239        return t;                                         \
     240      }                                                   \
     241    while (0)
     242  
     243  /* First we include all the functions we want to tune inside this program.
     244     We can't use the GNU MPFR library since the thresholds are fixed macros. */
     245  
     246  /* Setup mpfr_exp_2 */
     247  mpfr_prec_t mpfr_exp_2_threshold;
     248  #undef  MPFR_EXP_2_THRESHOLD
     249  #define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
     250  #include "exp_2.c"
     251  static double
     252  speed_mpfr_exp_2 (struct speed_params *s)
     253  {
     254    SPEED_MPFR_FUNC (mpfr_exp_2);
     255  }
     256  
     257  /* Setup mpfr_exp */
     258  mpfr_prec_t mpfr_exp_threshold;
     259  #undef  MPFR_EXP_THRESHOLD
     260  #define MPFR_EXP_THRESHOLD mpfr_exp_threshold
     261  #include "exp.c"
     262  static double
     263  speed_mpfr_exp (struct speed_params *s)
     264  {
     265    SPEED_MPFR_FUNC (mpfr_exp);
     266  }
     267  
     268  /* Setup mpfr_sin_cos */
     269  mpfr_prec_t mpfr_sincos_threshold;
     270  #undef MPFR_SINCOS_THRESHOLD
     271  #define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold
     272  #include "sin_cos.c"
     273  #include "cos.c"
     274  static double
     275  speed_mpfr_sincos (struct speed_params *s)
     276  {
     277    SPEED_MPFR_FUNC2 (mpfr_sin_cos);
     278  }
     279  
     280  /* Setup mpfr_mul, mpfr_sqr and mpfr_div */
     281  /* Since mpfr_mul() deals with both mul and sqr, and contains an assert that
     282     the thresholds are >= 1, we initialize both values to 1 to avoid a failed
     283     assertion (let's recall that static assertions are replaced by normal
     284     dynamic assertions here). */
     285  mpfr_prec_t mpfr_mul_threshold = 1;
     286  mpfr_prec_t mpfr_sqr_threshold = 1;
     287  mpfr_prec_t mpfr_div_threshold;
     288  #undef  MPFR_MUL_THRESHOLD
     289  #define MPFR_MUL_THRESHOLD mpfr_mul_threshold
     290  #undef  MPFR_SQR_THRESHOLD
     291  #define MPFR_SQR_THRESHOLD mpfr_sqr_threshold
     292  #undef  MPFR_DIV_THRESHOLD
     293  #define MPFR_DIV_THRESHOLD mpfr_div_threshold
     294  #include "mul.c"
     295  #include "div.c"
     296  static double
     297  speed_mpfr_mul (struct speed_params *s)
     298  {
     299    SPEED_MPFR_OP (mpfr_mul);
     300  }
     301  static double
     302  speed_mpfr_sqr (struct speed_params *s)
     303  {
     304    SPEED_MPFR_SQR (mpfr_mul);
     305  }
     306  static double
     307  speed_mpfr_div (struct speed_params *s)
     308  {
     309    SPEED_MPFR_OP (mpfr_div);
     310  }
     311  
     312  /************************************************
     313   * Common functions (inspired by GMP function)  *
     314   ************************************************/
     315  static int
     316  analyze_data (double *dat, int ndat)
     317  {
     318    double  x, min_x;
     319    int     j, min_j;
     320  
     321    x = 0.0;
     322    for (j = 0; j < ndat; j++)
     323      if (dat[j] > 0.0)
     324        x += dat[j];
     325  
     326    min_x = x;
     327    min_j = 0;
     328  
     329    for (j = 0; j < ndat; x -= dat[j], j++)
     330      {
     331        if (x < min_x)
     332          {
     333            min_x = x;
     334            min_j = j;
     335          }
     336      }
     337    return min_j;
     338  }
     339  
     340  static double
     341  mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m)
     342  {
     343    double t = -1.0;
     344    int i;
     345    int number_of_iterations = 30;
     346    for (i = 1; i <= number_of_iterations && t == -1.0; i++)
     347      {
     348        t = speed_measure (fun, s);
     349        if ( (t == -1.0) && (i+1 <= number_of_iterations) )
     350          printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n",
     351                 s->size, i+1, number_of_iterations);
     352      }
     353    if (t == -1.0)
     354      {
     355        fprintf (stderr, "Failed to measure %s!\n", m);
     356        fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n");
     357        fprintf (stderr, "   under Linux: cpufreq-selector -g performance\n");
     358        fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n");
     359        abort ();
     360      }
     361    return t;
     362  }
     363  
     364  #define THRESHOLD_WINDOW 16
     365  #define THRESHOLD_FINAL_WINDOW 128
     366  static double
     367  domeasure (mpfr_prec_t *threshold,
     368             double (*func) (struct speed_params *),
     369             mpfr_prec_t p)
     370  {
     371    struct speed_params s;
     372    mp_size_t size;
     373    double t1, t2, d;
     374  
     375    s.align_xp = s.align_yp = s.align_wp = 64;
     376    s.size = p;
     377    size = (p - 1)/GMP_NUMB_BITS+1;
     378    s.xp = malloc (2*size*sizeof (mp_limb_t));
     379    if (s.xp == NULL)
     380      {
     381        fprintf (stderr, "Can't allocate memory.\n");
     382        abort ();
     383      }
     384    mpn_random (s.xp, size);
     385    s.yp = s.xp + size;
     386    mpn_random (s.yp, size);
     387    *threshold = MPFR_PREC_MAX;
     388    t1 = mpfr_speed_measure (func, &s, "function 1");
     389    *threshold = 1;
     390    t2 = mpfr_speed_measure (func, &s, "function 2");
     391    free (s.xp);
     392    /* t1 is the time of the first algo (used for low prec) */
     393    if (t2 >= t1)
     394      d = (t2 - t1) / t2;
     395    else
     396      d = (t2 - t1) / t1;
     397    /* d > 0 if we have to use algo 1.
     398       d < 0 if we have to use algo 2 */
     399    return d;
     400  }
     401  
     402  /* Performs measures when both the precision and the point of evaluation
     403     shall vary. s.yp is ignored and not initialized.
     404     It assumes that func depends on three thresholds with a boundary of the
     405     form threshold1*x + threshold2*p = some scaling factor, if x<0,
     406     and  threshold3*x + threshold2*p = some scaling factor, if x>=0.
     407  */
     408  static double
     409  domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3,
     410              double (*func) (struct speed_params *),
     411              mpfr_prec_t p,
     412              mpfr_t x)
     413  {
     414    struct speed_params s;
     415    mp_size_t size;
     416    double t1, t2, d;
     417    mpfr_t xtmp;
     418  
     419    if (MPFR_IS_SINGULAR (x))
     420      {
     421        mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n");
     422        abort ();
     423      }
     424    if (MPFR_IS_NEG (x))
     425      s.align_xp = 2;
     426    else
     427      s.align_xp = 1;
     428  
     429    s.align_yp = s.align_wp = 64;
     430    s.size = p;
     431    size = (p - 1)/GMP_NUMB_BITS+1;
     432  
     433    mpfr_init2 (xtmp, p);
     434    mpn_random (xtmp->_mpfr_d, size);
     435    xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT;
     436    MPFR_SET_EXP (xtmp, -53);
     437    mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN);
     438    mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb)       */
     439                                        /* where perturb ~ 2^(-53) is */
     440                                        /* randomly chosen.           */
     441    s.xp = xtmp->_mpfr_d;
     442    s.r = MPFR_GET_EXP (xtmp);
     443  
     444    *threshold1 = 0;
     445    *threshold2 = 0;
     446    *threshold3 = 0;
     447    t1 = mpfr_speed_measure (func, &s, "function 1");
     448  
     449    if (MPFR_IS_NEG (x))
     450      *threshold1 = INT_MIN;
     451    else
     452      *threshold3 = INT_MAX;
     453    *threshold2 = INT_MAX;
     454    t2 = mpfr_speed_measure (func, &s, "function 2");
     455  
     456    /* t1 is the time of the first algo (used for low prec) */
     457    if (t2 >= t1)
     458      d = (t2 - t1) / t2;
     459    else
     460      d = (t2 - t1) / t1;
     461    /* d > 0 if we have to use algo 1.
     462       d < 0 if we have to use algo 2 */
     463    mpfr_clear (xtmp);
     464    return d;
     465  }
     466  
     467  /* Tune a function with a simple THRESHOLD
     468     The function doesn't depend on another threshold.
     469     It assumes that it uses algo1 if p < THRESHOLD
     470     and algo2 otherwise.
     471     if algo2 is better for low prec, and algo1 better for high prec,
     472     the behaviour of this function is undefined. */
     473  static void
     474  tune_simple_func (mpfr_prec_t *threshold,
     475                    double (*func) (struct speed_params *),
     476                    mpfr_prec_t pstart)
     477  {
     478    double measure[THRESHOLD_FINAL_WINDOW+1];
     479    double d = -1.0;
     480    mpfr_prec_t pstep;
     481    int i, numpos, numneg, try;
     482    mpfr_prec_t pmin, pmax, p;
     483  
     484    /* first look for a lower bound within 10% */
     485    pmin = p = pstart;
     486    for (i = 0; i < 10 && d < 0.0; i++)
     487      d = domeasure (threshold, func, pmin);
     488    if (d < 0.0)
     489      {
     490        if (verbose)
     491          printf ("Oops: even for precision %lu, algo 2 seems to be faster!\n",
     492                  (unsigned long) pmin);
     493        *threshold = pmin;
     494        return;
     495      }
     496    if (d >= 1.00)
     497      for (;;)
     498        {
     499          d = domeasure (threshold, func, pmin);
     500          if (d < 1.00)
     501            break;
     502          p = pmin;
     503          pmin += pmin/2;
     504        }
     505    pmin = p;
     506    for (;;)
     507      {
     508        d = domeasure (threshold, func, pmin);
     509        if (d < 0.10)
     510          break;
     511        pmin += GMP_NUMB_BITS;
     512      }
     513  
     514    /* then look for an upper bound within 20% */
     515    pmax = pmin * 2;
     516    for (;;)
     517      {
     518        d = domeasure (threshold, func, pmax);
     519        if (d < -0.20)
     520          break;
     521        pmax += pmin / 2; /* don't increase too rapidly */
     522      }
     523  
     524    /* The threshold is between pmin and pmax. Affine them */
     525    try = 0;
     526    while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
     527      {
     528        pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
     529        if (verbose)
     530          printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
     531        p = (pmin + pmax) / 2;
     532        for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
     533          {
     534            measure[i] = domeasure (threshold, func,
     535                                    p+(i-THRESHOLD_WINDOW/2)*pstep);
     536            if (measure[i] > 0)
     537              numpos ++;
     538            else if (measure[i] < 0)
     539              numneg ++;
     540          }
     541        if (numpos > numneg)
     542          /* We use more often algo 1 than algo 2 */
     543          pmin = p - THRESHOLD_WINDOW/2*pstep;
     544        else if (numpos < numneg)
     545          pmax = p + THRESHOLD_WINDOW/2*pstep;
     546        else
     547          /* numpos == numneg ... */
     548          if (++ try > 2)
     549            {
     550              *threshold = p;
     551              if (verbose)
     552                printf ("Quick find: %lu\n", *threshold);
     553              return ;
     554            }
     555      }
     556  
     557    /* Final tune... */
     558    if (verbose)
     559      printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
     560    for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
     561      measure[i] = domeasure (threshold, func, pmin+i);
     562    i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
     563    *threshold = pmin + i;
     564    if (verbose)
     565      printf ("%lu\n", *threshold);
     566    return;
     567  }
     568  
     569  /* Tune a function which behavior depends on both p and x,
     570     in a given direction.
     571     It assumes that for (x,p) close to zero, algo1 is used
     572     and algo2 is used when (x,p) is far from zero.
     573     If algo2 is better for low prec, and algo1 better for high prec,
     574     the behavior of this function is undefined.
     575     This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp)
     576     until it finds a point on the boundary. It returns ell.
     577   */
     578  static void
     579  tune_simple_func_in_some_direction (long int *threshold1,
     580                                      long int *threshold2,
     581                                      long int *threshold3,
     582                                      double (*func) (struct speed_params *),
     583                                      mpfr_prec_t pstart,
     584                                      int dirx, int dirp,
     585                                      mpfr_t xres, mpfr_prec_t *pres)
     586  {
     587    double measure[THRESHOLD_FINAL_WINDOW+1];
     588    double d;
     589    mpfr_prec_t pstep;
     590    int i, numpos, numneg, try;
     591    mpfr_prec_t pmin, pmax, p;
     592    mpfr_t xmin, xmax, x;
     593    mpfr_t ratio;
     594  
     595    mpfr_init2 (ratio, MPFR_SMALL_PRECISION);
     596    mpfr_set_si (ratio, dirx, MPFR_RNDN);
     597    mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN);
     598  
     599    mpfr_init2 (xmin, MPFR_SMALL_PRECISION);
     600    mpfr_init2 (xmax, MPFR_SMALL_PRECISION);
     601    mpfr_init2 (x, MPFR_SMALL_PRECISION);
     602  
     603    /* first look for a lower bound within 10% */
     604    pmin = p = pstart;
     605    mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
     606    mpfr_set (x, xmin, MPFR_RNDN);
     607  
     608    d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
     609    if (d < 0.0)
     610      {
     611        if (verbose)
     612          printf ("Oops: even for %lu, algo 2 seems to be faster!\n",
     613                  (unsigned long) pmin);
     614        *pres = MPFR_PREC_MIN;
     615        mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
     616        mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
     617        return;
     618      }
     619    if (d >= 1.00)
     620      for (;;)
     621        {
     622          d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
     623          if (d < 1.00)
     624            break;
     625          p = pmin;
     626          mpfr_set (x, xmin, MPFR_RNDN);
     627          pmin += pmin/2;
     628          mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
     629        }
     630    pmin = p;
     631    mpfr_set (xmin, x, MPFR_RNDN);
     632    for (;;)
     633      {
     634        d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin);
     635        if (d < 0.10)
     636          break;
     637        pmin += GMP_NUMB_BITS;
     638        mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
     639      }
     640  
     641    /* then look for an upper bound within 20% */
     642    pmax = pmin * 2;
     643    mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
     644    for (;;)
     645      {
     646        d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax);
     647        if (d < -0.20)
     648          break;
     649        pmax += pmin / 2; /* don't increase too rapidly */
     650        mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
     651      }
     652  
     653    /* The threshold is between pmin and pmax. Affine them */
     654    try = 0;
     655    while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW)
     656      {
     657        pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1);
     658        if (verbose)
     659          printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep);
     660        p = (pmin + pmax) / 2;
     661        mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN);
     662        for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++)
     663          {
     664            *pres = p+(i-THRESHOLD_WINDOW/2)*pstep;
     665            mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
     666            measure[i] = domeasure2 (threshold1, threshold2, threshold3,
     667                                     func, *pres, xres);
     668            if (measure[i] > 0)
     669              numpos ++;
     670            else if (measure[i] < 0)
     671              numneg ++;
     672          }
     673        if (numpos > numneg)
     674          {
     675            /* We use more often algo 1 than algo 2 */
     676            pmin = p - THRESHOLD_WINDOW/2*pstep;
     677            mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN);
     678          }
     679        else if (numpos < numneg)
     680          {
     681            pmax = p + THRESHOLD_WINDOW/2*pstep;
     682            mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN);
     683          }
     684        else
     685          /* numpos == numneg ... */
     686          if (++ try > 2)
     687            {
     688              *pres = p;
     689              mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
     690              if (verbose)
     691                printf ("Quick find: %lu\n", *pres);
     692              mpfr_clear (ratio);
     693              mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
     694              return ;
     695            }
     696      }
     697  
     698    /* Final tune... */
     699    if (verbose)
     700      printf ("Finalizing in [%lu, %lu]... ", pmin, pmax);
     701    for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++)
     702      {
     703        *pres = pmin+i;
     704        mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
     705        measure[i] = domeasure2 (threshold1, threshold2, threshold3,
     706                                 func, *pres, xres);
     707      }
     708    i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1);
     709    *pres = pmin + i;
     710    mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN);
     711    if (verbose)
     712      printf ("%lu\n", *pres);
     713    mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax);
     714    return;
     715  }
     716  
     717  /************************************
     718   * Tune Mulders' mulhigh function   *
     719   ************************************/
     720  #define TOLERANCE 1.00
     721  #define MULDERS_TABLE_SIZE 1024
     722  #ifndef MPFR_MULHIGH_SIZE
     723  # define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE
     724  #endif
     725  #ifndef MPFR_SQRHIGH_SIZE
     726  # define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE
     727  #endif
     728  #ifndef MPFR_DIVHIGH_SIZE
     729  # define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE
     730  #endif
     731  #define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE
     732  #define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE
     733  #define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE
     734  #include "mulders.c"
     735  
     736  static double
     737  speed_mpfr_mulhigh (struct speed_params *s)
     738  {
     739    SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n);
     740  }
     741  
     742  static double
     743  speed_mpfr_sqrhigh (struct speed_params *s)
     744  {
     745    SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n);
     746  }
     747  
     748  static double
     749  speed_mpfr_divhigh (struct speed_params *s)
     750  {
     751    SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size));
     752  }
     753  
     754  #define MAX_STEPS 513 /* maximum number of values of k tried for a given n */
     755  
     756  /* Tune mpfr_mulhigh_n for size n */
     757  static mp_size_t
     758  tune_mul_mulders_upto (mp_size_t n)
     759  {
     760    struct speed_params s;
     761    mp_size_t k, kbest, step;
     762    double t, tbest;
     763    MPFR_TMP_DECL (marker);
     764  
     765    if (n == 0)
     766      return -1;
     767  
     768    MPFR_TMP_MARK (marker);
     769    s.align_xp = s.align_yp = s.align_wp = 64;
     770    s.size = n;
     771    s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
     772    s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
     773    mpn_random (s.xp, n);
     774    mpn_random (s.yp, n);
     775  
     776    /* Check k == -1, mpn_mul_basecase */
     777    mulhigh_ktab[n] = -1;
     778    kbest = -1;
     779    tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
     780  
     781    /* Check k == 0, mpn_mulhigh_n_basecase */
     782    mulhigh_ktab[n] = 0;
     783    t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
     784    if (t * TOLERANCE < tbest)
     785      kbest = 0, tbest = t;
     786  
     787    /* Check Mulders with cutoff point k */
     788    step = 1 + n / (2 * MAX_STEPS);
     789    /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
     790    for (k = (n + 4) / 2 ; k < n ; k += step)
     791      {
     792        mulhigh_ktab[n] = k;
     793        t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh");
     794        if (t * TOLERANCE < tbest)
     795          kbest = k, tbest = t;
     796      }
     797  
     798    mulhigh_ktab[n] = kbest;
     799  
     800    MPFR_TMP_FREE (marker);
     801    return kbest;
     802  }
     803  
     804  /* Tune mpfr_sqrhigh_n for size n */
     805  static mp_size_t
     806  tune_sqr_mulders_upto (mp_size_t n)
     807  {
     808    struct speed_params s;
     809    mp_size_t k, kbest, step;
     810    double t, tbest;
     811    MPFR_TMP_DECL (marker);
     812  
     813    if (n == 0)
     814      return -1;
     815  
     816    MPFR_TMP_MARK (marker);
     817    s.align_xp = s.align_wp = 64;
     818    s.size = n;
     819    s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
     820    mpn_random (s.xp, n);
     821  
     822    /* Check k == -1, mpn_sqr_basecase */
     823    sqrhigh_ktab[n] = -1;
     824    kbest = -1;
     825    tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
     826  
     827    /* Check k == 0, mpfr_mulhigh_n_basecase */
     828    sqrhigh_ktab[n] = 0;
     829    t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
     830    if (t * TOLERANCE < tbest)
     831      kbest = 0, tbest = t;
     832  
     833    /* Check Mulders */
     834    step = 1 + n / (2 * MAX_STEPS);
     835    /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */
     836    for (k = (n + 4) / 2 ; k < n ; k += step)
     837      {
     838        sqrhigh_ktab[n] = k;
     839        t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh");
     840        if (t * TOLERANCE < tbest)
     841          kbest = k, tbest = t;
     842      }
     843  
     844    sqrhigh_ktab[n] = kbest;
     845  
     846    MPFR_TMP_FREE (marker);
     847    return kbest;
     848  }
     849  
     850  /* Tune mpfr_divhigh_n for size n.
     851     Ensure divhigh_ktab[n] < n for n > 0. */
     852  static mp_size_t
     853  tune_div_mulders_upto (mp_size_t n)
     854  {
     855    struct speed_params s;
     856    mp_size_t k, kbest, step;
     857    double t, tbest;
     858    MPFR_TMP_DECL (marker);
     859  
     860    /* we require n > 2 in mpfr_divhigh */
     861    if (n <= 2)
     862      {
     863        divhigh_ktab[n] = 0;
     864        return 0;
     865      }
     866  
     867    MPFR_TMP_MARK (marker);
     868    s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64;
     869    s.size = n;
     870    s.xp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
     871    s.yp   = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t));
     872    mpn_random (s.xp, n);
     873    mpn_random (s.yp, n);
     874  
     875    /* Check k == 0, i.e., mpfr_divhigh_n_basecase */
     876    kbest = 0;
     877    tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
     878  
     879    /* Check Mulders */
     880    step = 1 + n / (2 * MAX_STEPS);
     881    /* we should have (n+3)/2 <= k < n-1, which translates into
     882       (n+4)/2 <= k < n-1 in C */
     883    for (k = (n + 4) / 2 ; k < n - 1; k += step)
     884      {
     885        divhigh_ktab[n] = k;
     886        t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh");
     887        if (t * TOLERANCE < tbest)
     888          kbest = k, tbest = t;
     889      }
     890  
     891    MPFR_ASSERTN(kbest < n);
     892    divhigh_ktab[n] = kbest;
     893  
     894    MPFR_TMP_FREE (marker);
     895  
     896    return kbest;
     897  }
     898  
     899  static void
     900  tune_mul_mulders (FILE *f)
     901  {
     902    mp_size_t k;
     903  
     904    if (verbose)
     905      printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE);
     906    fprintf (f, "#define MPFR_MULHIGH_TAB  \\\n ");
     907    for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++)
     908      {
     909        fprintf (f, "%d", (int) tune_mul_mulders_upto (k));
     910        if (k != MPFR_MULHIGH_TAB_SIZE-1)
     911          fputc (',', f);
     912        if ((k+1) % 16 == 0)
     913          fprintf (f, " \\\n ");
     914        if (verbose)
     915          putchar ('.');
     916      }
     917    fprintf (f, " \n");
     918    if (verbose)
     919      putchar ('\n');
     920  }
     921  
     922  static void
     923  tune_sqr_mulders (FILE *f)
     924  {
     925    mp_size_t k;
     926  
     927    if (verbose)
     928      printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE);
     929    fprintf (f, "#define MPFR_SQRHIGH_TAB  \\\n ");
     930    for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++)
     931      {
     932        fprintf (f, "%d", (int) tune_sqr_mulders_upto (k));
     933        if (k != MPFR_SQRHIGH_TAB_SIZE-1)
     934          fputc (',', f);
     935        if ((k+1) % 16 == 0)
     936          fprintf (f, " \\\n ");
     937        if (verbose)
     938          putchar ('.');
     939      }
     940    fprintf (f, " \n");
     941    if (verbose)
     942      putchar ('\n');
     943  }
     944  
     945  static void
     946  tune_div_mulders (FILE *f)
     947  {
     948    mp_size_t k;
     949  
     950    if (verbose)
     951      printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE);
     952    fprintf (f, "#define MPFR_DIVHIGH_TAB  \\\n ");
     953    for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++)
     954      {
     955        fprintf (f, "%d", (int) tune_div_mulders_upto (k));
     956        if (k != MPFR_DIVHIGH_TAB_SIZE - 1)
     957          fputc (',', f);
     958        if ((k+1) % 16 == 0)
     959          fprintf (f, " /*%zu-%zu*/ \\\n ", (size_t) k - 15, (size_t) k);
     960        if (verbose)
     961          putchar ('.');
     962      }
     963    fprintf (f, " \n");
     964    if (verbose)
     965      putchar ('\n');
     966  }
     967  
     968  /*******************************************************
     969   *            Tuning functions for mpfr_ai             *
     970   *******************************************************/
     971  
     972  long int mpfr_ai_threshold1;
     973  long int mpfr_ai_threshold2;
     974  long int mpfr_ai_threshold3;
     975  #undef  MPFR_AI_THRESHOLD1
     976  #define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1
     977  #undef  MPFR_AI_THRESHOLD2
     978  #define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2
     979  #undef  MPFR_AI_THRESHOLD3
     980  #define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3
     981  
     982  #include "ai.c"
     983  
     984  static double
     985  speed_mpfr_ai (struct speed_params *s)
     986  {
     987    SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai);
     988  }
     989  
     990  
     991  /*******************************************************
     992   *            Tune all the threshold of MPFR           *
     993   * Warning: tune the function in their dependent order!*
     994   *******************************************************/
     995  static void
     996  all (const char *filename)
     997  {
     998    FILE *f;
     999    time_t  start_time, end_time;
    1000    struct tm  *tp;
    1001    mpfr_t x1, x2, x3, tmp1, tmp2;
    1002    mpfr_prec_t p1, p2, p3;
    1003  
    1004    f = fopen (filename, "w");
    1005    if (f == NULL)
    1006      {
    1007        fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
    1008        abort ();
    1009      }
    1010  
    1011    speed_time_init ();
    1012    if (verbose)
    1013      {
    1014        printf ("Using: %s\n", speed_time_string);
    1015        printf ("speed_precision %d", speed_precision);
    1016        if (speed_unittime == 1.0)
    1017          printf (", speed_unittime 1 cycle");
    1018        else
    1019          printf (", speed_unittime %.2e secs", speed_unittime);
    1020        if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
    1021          printf (", CPU freq unknown\n");
    1022        else
    1023          printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime);
    1024      }
    1025  
    1026    time (&start_time);
    1027    tp = localtime (&start_time);
    1028    fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ",
    1029            tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
    1030  
    1031  #ifdef __ICC
    1032    fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10);
    1033  #elif defined(__GNUC__)
    1034  #ifdef __GNUC_PATCHLEVEL__
    1035    fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__,
    1036             __GNUC_PATCHLEVEL__);
    1037  #else
    1038    fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
    1039  #endif
    1040  #elif defined (__SUNPRO_C)
    1041    fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
    1042  #elif defined (__sgi) && defined (_COMPILER_VERSION)
    1043    fprintf (f, "MIPSpro C %d.%d.%d */\n",
    1044             _COMPILER_VERSION / 100,
    1045             _COMPILER_VERSION / 10 % 10,
    1046             _COMPILER_VERSION % 10);
    1047  #elif defined (__DECC) && defined (__DECC_VER)
    1048    fprintf (f, "DEC C %d */\n", __DECC_VER);
    1049  #else
    1050    fprintf (f, "system compiler */\n");
    1051  #endif
    1052    fprintf (f, "\n\n");
    1053    fprintf (f, "#ifndef MPFR_TUNE_CASE\n");
    1054    fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n");
    1055    fprintf (f, "#endif\n\n");
    1056  
    1057    /* Tune mulhigh */
    1058    tune_mul_mulders (f);
    1059  
    1060    /* Tune sqrhigh */
    1061    tune_sqr_mulders (f);
    1062  
    1063    /* Tune divhigh */
    1064    tune_div_mulders (f);
    1065    fflush (f);
    1066  
    1067    /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */
    1068    if (verbose)
    1069      printf ("Tuning mpfr_mul...\n");
    1070    tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul,
    1071                      2*GMP_NUMB_BITS+1);
    1072    fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n",
    1073             (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1);
    1074  
    1075    /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */
    1076    if (verbose)
    1077      printf ("Tuning mpfr_sqr...\n");
    1078    tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr,
    1079                      2*GMP_NUMB_BITS+1);
    1080    fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n",
    1081             (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1);
    1082  
    1083    /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */
    1084    if (verbose)
    1085      printf ("Tuning mpfr_div...\n");
    1086    tune_simple_func (&mpfr_div_threshold, speed_mpfr_div,
    1087                      2*GMP_NUMB_BITS+1);
    1088    fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n",
    1089             (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1);
    1090  
    1091    /* Tune mpfr_exp_2 */
    1092    if (verbose)
    1093      printf ("Tuning mpfr_exp_2...\n");
    1094    tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS);
    1095    fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n",
    1096             (unsigned long) mpfr_exp_2_threshold);
    1097  
    1098    /* Tune mpfr_exp */
    1099    if (verbose)
    1100      printf ("Tuning mpfr_exp...\n");
    1101    tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp,
    1102                      MPFR_PREC_MIN+3*GMP_NUMB_BITS);
    1103    fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n",
    1104             (unsigned long) mpfr_exp_threshold);
    1105  
    1106    /* Tune mpfr_sin_cos */
    1107    if (verbose)
    1108      printf ("Tuning mpfr_sin_cos...\n");
    1109    tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos,
    1110                      MPFR_PREC_MIN+3*GMP_NUMB_BITS);
    1111    fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n",
    1112             (unsigned long) mpfr_sincos_threshold);
    1113  
    1114    /* Tune mpfr_ai */
    1115    if (verbose)
    1116      printf ("Tuning mpfr_ai...\n");
    1117    mpfr_init2 (x1, MPFR_SMALL_PRECISION);
    1118    mpfr_init2 (x2, MPFR_SMALL_PRECISION);
    1119    mpfr_init2 (x3, MPFR_SMALL_PRECISION);
    1120    mpfr_init2 (tmp1, MPFR_SMALL_PRECISION);
    1121    mpfr_init2 (tmp2, MPFR_SMALL_PRECISION);
    1122  
    1123    tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
    1124                                        &mpfr_ai_threshold3, speed_mpfr_ai,
    1125                                        MPFR_PREC_MIN+GMP_NUMB_BITS,
    1126                                        -60, 200, x1, &p1);
    1127    tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
    1128                                        &mpfr_ai_threshold3, speed_mpfr_ai,
    1129                                        MPFR_PREC_MIN+GMP_NUMB_BITS,
    1130                                        -20, 500, x2, &p2);
    1131    tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2,
    1132                                        &mpfr_ai_threshold3, speed_mpfr_ai,
    1133                                        MPFR_PREC_MIN+GMP_NUMB_BITS,
    1134                                        40, 200, x3, &p3);
    1135  
    1136    mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN);
    1137    mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN);
    1138    mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN);
    1139    mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN);
    1140  
    1141    mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN);
    1142    mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN);
    1143    mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
    1144    mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN);
    1145  
    1146    mpfr_sub (tmp2, x2, x1, MPFR_RNDN);
    1147    mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN);
    1148    mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN);
    1149  
    1150    mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN);
    1151    mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN);
    1152    mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN);
    1153    mpfr_div (tmp1, tmp1, x3, MPFR_RNDN);
    1154    mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN);
    1155  
    1156    fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1);
    1157    fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2);
    1158    fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3);
    1159  
    1160    mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3);
    1161    mpfr_clear (tmp1); mpfr_clear (tmp2);
    1162  
    1163    /* End of tuning */
    1164    time (&end_time);
    1165    fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n",
    1166             (long) (end_time - start_time));
    1167    if (verbose)
    1168      printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time));
    1169  
    1170    fclose (f);
    1171  }
    1172  
    1173  
    1174  /* Main function */
    1175  int main (int argc, char *argv[])
    1176  {
    1177    /* Unbuffered so if output is redirected to a file it isn't lost if the
    1178       program is killed part way through.  */
    1179    setbuf (stdout, NULL);
    1180    setbuf (stderr, NULL);
    1181  
    1182    verbose = argc > 1;
    1183  
    1184    if (verbose)
    1185      printf ("Tuning MPFR (Coffee time?)...\n");
    1186  
    1187    all ("mparam.h");
    1188  
    1189    return 0;
    1190  }