1  /* Copyright (C) 2008-2023 Free Software Foundation, Inc.
       2     Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
       3  		on behalf of Synopsys Inc.
       4  
       5  This file is part of GCC.
       6  
       7  GCC is free software; you can redistribute it and/or modify it under
       8  the terms of the GNU General Public License as published by the Free
       9  Software Foundation; either version 3, or (at your option) any later
      10  version.
      11  
      12  GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13  WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15  for more details.
      16  
      17  Under Section 7 of GPL version 3, you are granted additional
      18  permissions described in the GCC Runtime Library Exception, version
      19  3.1, as published by the Free Software Foundation.
      20  
      21  You should have received a copy of the GNU General Public License and
      22  a copy of the GCC Runtime Library Exception along with this program;
      23  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      24  <http://www.gnu.org/licenses/>.  */
      25  
      26  /* We use a polynom similar to a Tchebycheff polynom to get an initial
      27     seed, and then use a newton-raphson iteration step to get an
      28     approximate result
      29     If this result can't be rounded to the exact result with confidence, we
      30     round to the value between the two closest representable values, and
      31     test if the correctly rounded value is above or below this value.
      32   
      33     Because of the Newton-raphson iteration step, an error in the seed at X
      34     is amplified by X.  Therefore, we don't want a Tchebycheff polynom
      35     or a polynom that is close to optimal according to the maximum norm
      36     on the errro of the seed value; we want one that is close to optimal
      37     according to the maximum norm on the error of the result, i.e. we
      38     want the maxima of the polynom to increase linearily.
      39     Given an interval [X0,X2) over which to approximate,
      40     with X1 := (X0+X2)/2,  D := X1-X0, F := 1/D, and S := D/X1 we have,
      41     like for Tchebycheff polynoms:
      42     P(0) := 1
      43     but then we have:
      44     P(1) := X + S*D
      45     P(2) := 2 * X^2 + S*D * X - D^2
      46     Then again:
      47     P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
      48   */
      49  
      50  static long double merr = 42.;
      51  
      52  double
      53  err (long double a0, long double a1, long double x)
      54  {
      55    long double y0 = a0 + (x-1)*a1;
      56  
      57    long double approx = 2. * y0 - y0 * x * y0;
      58    long double true = 1./x;
      59    long double err = approx - true;
      60  
      61    if (err <= -1./65536./16384.)
      62      printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n",
      63  	    (double)x, (double)approx, (double)true);
      64    if (merr > err)
      65      merr = err;
      66    return err;
      67  }
      68  
      69  int
      70  main (void)
      71  {
      72    long double T[5]; /* Taylor polynom */
      73    long double P[5][5];
      74    int i, j;
      75    long double X0, X1, X2, S;
      76    long double inc = 1./64;
      77    long double D = inc*0.5;
      78    long i0, i1, i2, io;
      79  
      80    memset (P, 0, sizeof (P));
      81    P[0][0] = 1.;
      82    for (i = 1; i < 5; i++)
      83      P[i][i] = 1 << i-1;
      84    P[2][0] = -D*D;
      85    for (X0 = 1.; X0 < 2.; X0 += inc)
      86      {
      87        X1 = X0 + inc * 0.5;
      88        X2 = X0 + inc;
      89        S = D / X1;
      90        T[0] = 1./X1;
      91        for (i = 1; i < 5; i++)
      92  	T[i] = T[i-1] * -T[0];
      93  #if 0
      94        printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
      95  (double)T[3], (double)T[4]);
      96  #endif
      97        P[1][0] = S*D;
      98        P[2][1] = S*D;
      99        for (i = 3; i < 5; i++)
     100  	{
     101  	  P[i][0] = -D*D*P[i-2][0];
     102  	  for (j = 1; j < i; j++)
     103  	    P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
     104  	}
     105  #if 0
     106        printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
     107  (double)P[3][3], (double)P[3][4]);
     108        printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
     109  (double)P[4][3], (double)P[4][4]);
     110  #endif
     111        for (i = 4; i > 1; i--)
     112  	{
     113  	  long double a = T[i]/P[i][i];
     114  
     115  	  for (j = 0; j < i; j++)
     116  	    T[j] -= a * P[i][j];
     117  	}
     118  #if 0
     119        printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
     120  #endif
     121  #if 0
     122        i2 = T[2]*1024;
     123        long double a = (T[2]-i/1024.)/P[2][2];
     124        for (j = 0; j < 2; j++)
     125  	T[j] -= a * P[2][j];
     126  #else
     127        i2 = 0;
     128  #endif
     129  	  long double T0, Ti1;
     130        for (i = 0, i0 = 0; i < 4; i++)
     131  	{
     132  
     133  	  i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5;
     134  	  i1 = - (-i1 & 0x0fff);
     135  	  Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL);
     136  	  T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
     137  	  i0 = T0 * 1024 * 1024 + 0.5;
     138  	  i0 &= 0xfffff;
     139  	}
     140  #if 0
     141        printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
     142  #endif
     143        io = (unsigned)(-i1 << 20) | i0;
     144        long double A1 = (unsigned)io/-65536./65536.;
     145        long double A0 =  (unsigned)(io << 12)/65536./65536.;
     146        long double Xm0 = 1./sqrt (-A1);
     147        long double Xm1 = 0.5+0.5*-A0/A1;
     148  #if 0
     149        printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0);
     150        printf ("%.12f %.12f %.12f\n",
     151  	      err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2));
     152        printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1);
     153        printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1));
     154  #endif
     155        printf ("\t.long 0x%x\n", io);
     156     }
     157  #if 0
     158    printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2));
     159  #endif
     160    return 0;
     161  }