1  /* @(#)s_log1p.c 5.1 93/09/24 */
       2  /*
       3   * ====================================================
       4   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
       5   *
       6   * Developed at SunPro, a Sun Microsystems, Inc. business.
       7   * Permission to use, copy, modify, and distribute this
       8   * software is freely granted, provided that this notice
       9   * is preserved.
      10   * ====================================================
      11   */
      12  /* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
      13     for performance improvement on pipelined processors.
      14   */
      15  
      16  /* double log1p(double x)
      17   *
      18   * Method :
      19   *   1. Argument Reduction: find k and f such that
      20   *			1+x = 2^k * (1+f),
      21   *	   where  sqrt(2)/2 < 1+f < sqrt(2) .
      22   *
      23   *      Note. If k=0, then f=x is exact. However, if k!=0, then f
      24   *	may not be representable exactly. In that case, a correction
      25   *	term is need. Let u=1+x rounded. Let c = (1+x)-u, then
      26   *	log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
      27   *	and add back the correction term c/u.
      28   *	(Note: when x > 2**53, one can simply return log(x))
      29   *
      30   *   2. Approximation of log1p(f).
      31   *	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
      32   *		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
      33   *		 = 2s + s*R
      34   *      We use a special Reme algorithm on [0,0.1716] to generate
      35   *	a polynomial of degree 14 to approximate R The maximum error
      36   *	of this polynomial approximation is bounded by 2**-58.45. In
      37   *	other words,
      38   *			2      4      6      8      10      12      14
      39   *	    R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
      40   *	(the values of Lp1 to Lp7 are listed in the program)
      41   *	and
      42   *	    |      2          14          |     -58.45
      43   *	    | Lp1*s +...+Lp7*s    -  R(z) | <= 2
      44   *	    |                             |
      45   *	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
      46   *	In order to guarantee error in log below 1ulp, we compute log
      47   *	by
      48   *		log1p(f) = f - (hfsq - s*(hfsq+R)).
      49   *
      50   *	3. Finally, log1p(x) = k*ln2 + log1p(f).
      51   *			     = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
      52   *	   Here ln2 is split into two floating point number:
      53   *			ln2_hi + ln2_lo,
      54   *	   where n*ln2_hi is always exact for |n| < 2000.
      55   *
      56   * Special cases:
      57   *	log1p(x) is NaN with signal if x < -1 (including -INF) ;
      58   *	log1p(+INF) is +INF; log1p(-1) is -INF with signal;
      59   *	log1p(NaN) is that NaN with no signal.
      60   *
      61   * Accuracy:
      62   *	according to an error analysis, the error is always less than
      63   *	1 ulp (unit in the last place).
      64   *
      65   * Constants:
      66   * The hexadecimal values are the intended ones for the following
      67   * constants. The decimal values may be used, provided that the
      68   * compiler will convert from decimal to binary accurately enough
      69   * to produce the hexadecimal values shown.
      70   *
      71   * Note: Assuming log() return accurate answer, the following
      72   *	 algorithm can be used to compute log1p(x) to within a few ULP:
      73   *
      74   *		u = 1+x;
      75   *		if(u==1.0) return x ; else
      76   *			   return log(u)*(x/(u-1.0));
      77   *
      78   *	 See HP-15C Advanced Functions Handbook, p.193.
      79   */
      80  
      81  #include <float.h>
      82  #include <math.h>
      83  #include <math-barriers.h>
      84  #include <math_private.h>
      85  #include <math-underflow.h>
      86  #include <libc-diag.h>
      87  
      88  static const double
      89    ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */
      90    ln2_lo = 1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */
      91    two54 = 1.80143985094819840000e+16,   /* 43500000 00000000 */
      92    Lp[] = { 0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */
      93  	   3.999999999940941908e-01, /* 3FD99999 9997FA04 */
      94  	   2.857142874366239149e-01, /* 3FD24924 94229359 */
      95  	   2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
      96  	   1.818357216161805012e-01, /* 3FC74664 96CB03DE */
      97  	   1.531383769920937332e-01, /* 3FC39A09 D078C69F */
      98  	   1.479819860511658591e-01 }; /* 3FC2F112 DF3E5244 */
      99  
     100  static const double zero = 0.0;
     101  
     102  double
     103  __log1p (double x)
     104  {
     105    double hfsq, f, c, s, z, R, u, z2, z4, z6, R1, R2, R3, R4;
     106    int32_t k, hx, hu, ax;
     107  
     108    GET_HIGH_WORD (hx, x);
     109    ax = hx & 0x7fffffff;
     110  
     111    k = 1;
     112    if (hx < 0x3FDA827A)                          /* x < 0.41422  */
     113      {
     114        if (__glibc_unlikely (ax >= 0x3ff00000))           /* x <= -1.0 */
     115  	{
     116  	  if (x == -1.0)
     117  	    return -two54 / zero;               /* log1p(-1)=-inf */
     118  	  else
     119  	    return (x - x) / (x - x);           /* log1p(x<-1)=NaN */
     120  	}
     121        if (__glibc_unlikely (ax < 0x3e200000))           /* |x| < 2**-29 */
     122  	{
     123  	  math_force_eval (two54 + x);          /* raise inexact */
     124  	  if (ax < 0x3c900000)                  /* |x| < 2**-54 */
     125  	    {
     126  	      math_check_force_underflow (x);
     127  	      return x;
     128  	    }
     129  	  else
     130  	    return x - x * x * 0.5;
     131  	}
     132        if (hx > 0 || hx <= ((int32_t) 0xbfd2bec3))
     133  	{
     134  	  k = 0; f = x; hu = 1;
     135  	}                       /* -0.2929<x<0.41422 */
     136      }
     137    else if (__glibc_unlikely (hx >= 0x7ff00000))
     138      return x + x;
     139    if (k != 0)
     140      {
     141        if (hx < 0x43400000)
     142  	{
     143  	  u = 1.0 + x;
     144  	  GET_HIGH_WORD (hu, u);
     145  	  k = (hu >> 20) - 1023;
     146  	  c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
     147  	  c /= u;
     148  	}
     149        else
     150  	{
     151  	  u = x;
     152  	  GET_HIGH_WORD (hu, u);
     153  	  k = (hu >> 20) - 1023;
     154  	  c = 0;
     155  	}
     156        hu &= 0x000fffff;
     157        if (hu < 0x6a09e)
     158  	{
     159  	  SET_HIGH_WORD (u, hu | 0x3ff00000);   /* normalize u */
     160  	}
     161        else
     162  	{
     163  	  k += 1;
     164  	  SET_HIGH_WORD (u, hu | 0x3fe00000);   /* normalize u/2 */
     165  	  hu = (0x00100000 - hu) >> 2;
     166  	}
     167        f = u - 1.0;
     168      }
     169    hfsq = 0.5 * f * f;
     170    if (hu == 0)          /* |f| < 2**-20 */
     171      {
     172        if (f == zero)
     173  	{
     174  	  if (k == 0)
     175  	    return zero;
     176  	  else
     177  	    {
     178  	      c += k * ln2_lo; return k * ln2_hi + c;
     179  	    }
     180  	}
     181        R = hfsq * (1.0 - 0.66666666666666666 * f);
     182        if (k == 0)
     183  	return f - R;
     184        else
     185  	return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
     186      }
     187    s = f / (2.0 + f);
     188    z = s * s;
     189    R1 = z * Lp[1]; z2 = z * z;
     190    R2 = Lp[2] + z * Lp[3]; z4 = z2 * z2;
     191    R3 = Lp[4] + z * Lp[5]; z6 = z4 * z2;
     192    R4 = Lp[6] + z * Lp[7];
     193    R = R1 + z2 * R2 + z4 * R3 + z6 * R4;
     194    if (k == 0)
     195      return f - (hfsq - s * (hfsq + R));
     196    else
     197      {
     198        /* With GCC 7 when compiling with -Os the compiler warns that c
     199  	 might be used uninitialized.  This can't be true because k
     200  	 must be 0 for c to be uninitialized and we handled that
     201  	 computation earlier without using c.  */
     202        DIAG_PUSH_NEEDS_COMMENT;
     203        DIAG_IGNORE_Os_NEEDS_COMMENT (7, "-Wmaybe-uninitialized");
     204        return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
     205        DIAG_POP_NEEDS_COMMENT;
     206      }
     207  }