1  /* Double-precision vector (SVE) log function.
       2  
       3     Copyright (C) 2023 Free Software Foundation, Inc.
       4     This file is part of the GNU C Library.
       5  
       6     The GNU C Library is free software; you can redistribute it and/or
       7     modify it under the terms of the GNU Lesser General Public
       8     License as published by the Free Software Foundation; either
       9     version 2.1 of the License, or (at your option) any later version.
      10  
      11     The GNU C Library is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14     Lesser General Public License for more details.
      15  
      16     You should have received a copy of the GNU Lesser General Public
      17     License along with the GNU C Library; if not, see
      18     <https://www.gnu.org/licenses/>.  */
      19  
      20  #include "sv_math.h"
      21  
      22  #define P(i) sv_f64 (__v_log_data.poly[i])
      23  #define N (1 << V_LOG_TABLE_BITS)
      24  #define Off (0x3fe6900900000000)
      25  #define MaxTop (0x7ff)
      26  #define MinTop (0x001)
      27  #define ThreshTop (0x7fe) /* MaxTop - MinTop.  */
      28  
      29  static svfloat64_t NOINLINE
      30  special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
      31  {
      32    return sv_call_f64 (log, x, y, cmp);
      33  }
      34  
      35  /* SVE port of AdvSIMD log algorithm.
      36     Maximum measured error is 2.17 ulp:
      37     SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
      38  					 want 0x1.ffffff1cca045p-2.  */
      39  svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
      40  {
      41    svuint64_t ix = svreinterpret_u64_f64 (x);
      42    svuint64_t top = svlsr_n_u64_x (pg, ix, 52);
      43    svbool_t cmp
      44        = svcmpge_u64 (pg, svsub_n_u64_x (pg, top, MinTop), sv_u64 (ThreshTop));
      45  
      46    /* x = 2^k z; where z is in range [Off,2*Off) and exact.
      47       The range is split into N subintervals.
      48       The ith subinterval contains z and c is near its center.  */
      49    svuint64_t tmp = svsub_n_u64_x (pg, ix, Off);
      50    /* Equivalent to (tmp >> (52 - V_LOG_TABLE_BITS)) % N, since N is a power
      51       of 2.  */
      52    svuint64_t i = svand_n_u64_x (
      53        pg, svlsr_n_u64_x (pg, tmp, (52 - V_LOG_TABLE_BITS)), N - 1);
      54    svint64_t k = svasr_n_s64_x (pg, svreinterpret_s64_u64 (tmp),
      55  			       52); /* Arithmetic shift.  */
      56    svuint64_t iz
      57        = svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52));
      58    svfloat64_t z = svreinterpret_f64_u64 (iz);
      59    /* Lookup in 2 global lists (length N).  */
      60    svfloat64_t invc = svld1_gather_u64index_f64 (pg, __v_log_data.invc, i);
      61    svfloat64_t logc = svld1_gather_u64index_f64 (pg, __v_log_data.logc, i);
      62  
      63    /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
      64    svfloat64_t r = svmad_n_f64_x (pg, invc, z, -1);
      65    svfloat64_t kd = svcvt_f64_s64_x (pg, k);
      66    /* hi = r + log(c) + k*Ln2.  */
      67    svfloat64_t hi
      68        = svmla_n_f64_x (pg, svadd_f64_x (pg, logc, r), kd, __v_log_data.ln2);
      69    /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
      70    svfloat64_t r2 = svmul_f64_x (pg, r, r);
      71    svfloat64_t y = svmla_f64_x (pg, P (2), r, P (3));
      72    svfloat64_t p = svmla_f64_x (pg, P (0), r, P (1));
      73    y = svmla_f64_x (pg, y, r2, P (4));
      74    y = svmla_f64_x (pg, p, r2, y);
      75    y = svmla_f64_x (pg, hi, r2, y);
      76  
      77    if (__glibc_unlikely (svptest_any (pg, cmp)))
      78      return special_case (x, y, cmp);
      79    return y;
      80  }