(root)/
glibc-2.38/
sysdeps/
ieee754/
flt-32/
e_exp10f.c
       1  /* Single-precision 10^x function.
       2     Copyright (C) 2020-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <math.h>
      20  #include <math-narrow-eval.h>
      21  #include <stdint.h>
      22  #include <libm-alias-finite.h>
      23  #include <libm-alias-float.h>
      24  #include <shlib-compat.h>
      25  #include <math-svid-compat.h>
      26  #include "math_config.h"
      27  
      28  /*
      29    EXP2F_TABLE_BITS 5
      30    EXP2F_POLY_ORDER 3
      31  
      32    Max. ULP error: 0.502 (normal range, nearest rounding).
      33    Max. relative error: 2^-33.240 (before rounding to float).
      34    Wrong count: 169839.
      35    Non-nearest ULP error: 1 (rounded ULP error).
      36  
      37    Detailed error analysis (for EXP2F_TABLE_BITS=3 thus N=32):
      38  
      39    - We first compute z = RN(InvLn10N * x) where
      40  
      41        InvLn10N = N*log(10)/log(2) * (1 + theta1) with |theta1| < 2^-54.150
      42  
      43      since z is rounded to nearest:
      44  
      45        z = InvLn10N * x * (1 + theta2) with |theta2| < 2^-53
      46  
      47      thus z =  N*log(10)/log(2) * x * (1 + theta3) with |theta3| < 2^-52.463
      48  
      49    - Since |x| < 38 in the main branch, we deduce:
      50  
      51      z = N*log(10)/log(2) * x + theta4 with |theta4| < 2^-40.483
      52  
      53    - We then write z = k + r where k is an integer and |r| <= 0.5 (exact)
      54  
      55    - We thus have
      56  
      57      x = z*log(2)/(N*log(10)) - theta4*log(2)/(N*log(10))
      58        = z*log(2)/(N*log(10)) + theta5 with |theta5| < 2^-47.215
      59  
      60      10^x = 2^(k/N) * 2^(r/N) * 10^theta5
      61           =  2^(k/N) * 2^(r/N) * (1 + theta6) with |theta6| < 2^-46.011
      62  
      63    - Then 2^(k/N) is approximated by table lookup, the maximal relative error
      64      is for (k%N) = 5, with
      65  
      66        s = 2^(i/N) * (1 + theta7) with |theta7| < 2^-53.249
      67  
      68    - 2^(r/N) is approximated by a degree-3 polynomial, where the maximal
      69      mathematical relative error is 2^-33.243.
      70  
      71    - For C[0] * r + C[1], assuming no FMA is used, since |r| <= 0.5 and
      72      |C[0]| < 1.694e-6, |C[0] * r| < 8.47e-7, and the absolute error on
      73      C[0] * r is bounded by 1/2*ulp(8.47e-7) = 2^-74.  Then we add C[1] with
      74      |C[1]| < 0.000235, thus the absolute error on C[0] * r + C[1] is bounded
      75      by 2^-65.994 (z is bounded by 0.000236).
      76  
      77    - For r2 = r * r, since |r| <= 0.5, we have |r2| <= 0.25 and the absolute
      78      error is bounded by 1/4*ulp(0.25) = 2^-56 (the factor 1/4 is because |r2|
      79      cannot exceed 1/4, and on the left of 1/4 the distance between two
      80      consecutive numbers is 1/4*ulp(1/4)).
      81  
      82    - For y = C[2] * r + 1, assuming no FMA is used, since |r| <= 0.5 and
      83      |C[2]| < 0.0217, the absolute error on C[2] * r is bounded by 2^-60,
      84      and thus the absolute error on C[2] * r + 1 is bounded by 1/2*ulp(1)+2^60
      85      < 2^-52.988, and |y| < 1.01085 (the error bound is better if a FMA is
      86      used).
      87  
      88    - for z * r2 + y: the absolute error on z is bounded by 2^-65.994, with
      89      |z| < 0.000236, and the absolute error on r2 is bounded by 2^-56, with
      90      r2 < 0.25, thus |z*r2| < 0.000059, and the absolute error on z * r2
      91      (including the rounding error) is bounded by:
      92  
      93        2^-65.994 * 0.25 + 0.000236 * 2^-56 + 1/2*ulp(0.000059) < 2^-66.429.
      94  
      95      Now we add y, with |y| < 1.01085, and error on y bounded by 2^-52.988,
      96      thus the absolute error is bounded by:
      97  
      98        2^-66.429 + 2^-52.988 + 1/2*ulp(1.01085) < 2^-51.993
      99  
     100    - Now we convert the error on y into relative error.  Recall that y
     101      approximates 2^(r/N), for |r| <= 0.5 and N=32. Thus 2^(-0.5/32) <= y,
     102      and the relative error on y is bounded by
     103  
     104        2^-51.993/2^(-0.5/32) < 2^-51.977
     105  
     106    - Taking into account the mathematical relative error of 2^-33.243 we have:
     107  
     108        y = 2^(r/N) * (1 + theta8) with |theta8| < 2^-33.242
     109  
     110    - Since we had s = 2^(k/N) * (1 + theta7) with |theta7| < 2^-53.249,
     111      after y = y * s we get y = 2^(k/N) * 2^(r/N) * (1 + theta9)
     112      with |theta9| < 2^-33.241
     113  
     114    - Finally, taking into account the error theta6 due to the rounding error on
     115      InvLn10N, and when multiplying InvLn10N * x, we get:
     116  
     117        y = 10^x * (1 + theta10) with |theta10| < 2^-33.240
     118  
     119    - Converting into binary64 ulps, since y < 2^53*ulp(y), the error is at most
     120      2^19.76 ulp(y)
     121  
     122    - If the result is a binary32 value in the normal range (i.e., >= 2^-126),
     123      then the error is at most 2^-9.24 ulps, i.e., less than 0.00166 (in the
     124      subnormal range, the error in ulps might be larger).
     125  
     126    Note that this bound is tight, since for x=-0x25.54ac0p0 the final value of
     127    y (before conversion to float) differs from 879853 ulps from the correctly
     128    rounded value, and 879853 ~ 2^19.7469.  */
     129  
     130  #define N (1 << EXP2F_TABLE_BITS)
     131  
     132  #define InvLn10N (0x3.5269e12f346e2p0 * N) /* log(10)/log(2) to nearest */
     133  #define T __exp2f_data.tab
     134  #define C __exp2f_data.poly_scaled
     135  #define SHIFT __exp2f_data.shift
     136  
     137  static inline uint32_t
     138  top13 (float x)
     139  {
     140    return asuint (x) >> 19;
     141  }
     142  
     143  float
     144  __exp10f (float x)
     145  {
     146    uint32_t abstop;
     147    uint64_t ki, t;
     148    double kd, xd, z, r, r2, y, s;
     149  
     150    xd = (double) x;
     151    abstop = top13 (x) & 0xfff; /* Ignore sign.  */
     152    if (__glibc_unlikely (abstop >= top13 (38.0f)))
     153      {
     154        /* |x| >= 38 or x is nan.  */
     155        if (asuint (x) == asuint (-INFINITY))
     156          return 0.0f;
     157        if (abstop >= top13 (INFINITY))
     158          return x + x;
     159        /* 0x26.8826ap0 is the largest value such that 10^x < 2^128.  */
     160        if (x > 0x26.8826ap0f)
     161          return __math_oflowf (0);
     162        /* -0x2d.278d4p0 is the smallest value such that 10^x > 2^-150.  */
     163        if (x < -0x2d.278d4p0f)
     164          return __math_uflowf (0);
     165  #if WANT_ERRNO_UFLOW
     166        if (x < -0x2c.da7cfp0)
     167          return __math_may_uflowf (0);
     168  #endif
     169        /* the smallest value such that 10^x >= 2^-126 (normal range)
     170           is x = -0x25.ee060p0 */
     171        /* we go through here for 2014929 values out of 2060451840
     172           (not counting NaN and infinities, i.e., about 0.1% */
     173      }
     174  
     175    /* x*N*Ln10/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
     176    z = InvLn10N * xd;
     177    /* |xd| < 38 thus |z| < 1216 */
     178  #if TOINT_INTRINSICS
     179    kd = roundtoint (z);
     180    ki = converttoint (z);
     181  #else
     182  # define SHIFT __exp2f_data.shift
     183    kd = math_narrow_eval ((double) (z + SHIFT)); /* Needs to be double.  */
     184    ki = asuint64 (kd);
     185    kd -= SHIFT;
     186  #endif
     187    r = z - kd;
     188  
     189    /* 10^x = 10^(k/N) * 10^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)  */
     190    t = T[ki % N];
     191    t += ki << (52 - EXP2F_TABLE_BITS);
     192    s = asdouble (t);
     193    z = C[0] * r + C[1];
     194    r2 = r * r;
     195    y = C[2] * r + 1;
     196    y = z * r2 + y;
     197    y = y * s;
     198    return (float) y;
     199  }
     200  #ifndef __exp10f
     201  strong_alias (__exp10f, __ieee754_exp10f)
     202  libm_alias_finite (__ieee754_exp10f, __exp10f)
     203  /* For architectures that already provided exp10f without SVID support, there
     204     is no need to add a new version.  */
     205  #if !LIBM_SVID_COMPAT
     206  # define EXP10F_VERSION GLIBC_2_26
     207  #else
     208  # define EXP10F_VERSION GLIBC_2_32
     209  #endif
     210  versioned_symbol (libm, __exp10f, exp10f, EXP10F_VERSION);
     211  libm_alias_float_other (__exp10, exp10)
     212  #endif