1  /* Single-precision vector (SVE) sin function.
       2  
       3     Copyright (C) 2023 Free Software Foundation, Inc.
       4     This file is part of the GNU C Library.
       5  
       6     The GNU C Library is free software; you can redistribute it and/or
       7     modify it under the terms of the GNU Lesser General Public
       8     License as published by the Free Software Foundation; either
       9     version 2.1 of the License, or (at your option) any later version.
      10  
      11     The GNU C Library is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14     Lesser General Public License for more details.
      15  
      16     You should have received a copy of the GNU Lesser General Public
      17     License along with the GNU C Library; if not, see
      18     <https://www.gnu.org/licenses/>.  */
      19  
      20  #include "sv_math.h"
      21  
      22  static const struct data
      23  {
      24    float poly[4];
      25    /* Pi-related values to be loaded as one quad-word and used with
      26       svmla_lane_f32.  */
      27    float negpi1, negpi2, negpi3, invpi;
      28    float shift;
      29  } data = {
      30    .poly = {
      31      /* Non-zero coefficients from the degree 9 Taylor series expansion of
      32         sin.  */
      33      -0x1.555548p-3f, 0x1.110df4p-7f, -0x1.9f42eap-13f, 0x1.5b2e76p-19f
      34    },
      35    .negpi1 = -0x1.921fb6p+1f,
      36    .negpi2 = 0x1.777a5cp-24f,
      37    .negpi3 = 0x1.ee59dap-49f,
      38    .invpi = 0x1.45f306p-2f,
      39    .shift = 0x1.8p+23f
      40  };
      41  
      42  #define RangeVal 0x49800000 /* asuint32 (0x1p20f).  */
      43  #define C(i) sv_f32 (d->poly[i])
      44  
      45  static svfloat32_t NOINLINE
      46  special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
      47  {
      48    return sv_call_f32 (sinf, x, y, cmp);
      49  }
      50  
      51  /* A fast SVE implementation of sinf.
      52     Maximum error: 1.89 ULPs.
      53     This maximum error is achieved at multiple values in [-2^18, 2^18]
      54     but one example is:
      55     SV_NAME_F1 (sin)(0x1.9247a4p+0) got 0x1.fffff6p-1 want 0x1.fffffap-1.  */
      56  svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
      57  {
      58    const struct data *d = ptr_barrier (&data);
      59  
      60    svfloat32_t ax = svabs_f32_x (pg, x);
      61    svuint32_t sign = sveor_u32_x (pg, svreinterpret_u32_f32 (x),
      62  				 svreinterpret_u32_f32 (ax));
      63    svbool_t cmp = svcmpge_n_u32 (pg, svreinterpret_u32_f32 (ax), RangeVal);
      64  
      65    /* pi_vals are a quad-word of helper values - the first 3 elements contain
      66       -pi in extended precision, the last contains 1 / pi.  */
      67    svfloat32_t pi_vals = svld1rq_f32 (svptrue_b32 (), &d->negpi1);
      68  
      69    /* n = rint(|x|/pi).  */
      70    svfloat32_t n = svmla_lane_f32 (sv_f32 (d->shift), ax, pi_vals, 3);
      71    svuint32_t odd = svlsl_n_u32_x (pg, svreinterpret_u32_f32 (n), 31);
      72    n = svsub_n_f32_x (pg, n, d->shift);
      73  
      74    /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
      75    svfloat32_t r;
      76    r = svmla_lane_f32 (ax, n, pi_vals, 0);
      77    r = svmla_lane_f32 (r, n, pi_vals, 1);
      78    r = svmla_lane_f32 (r, n, pi_vals, 2);
      79  
      80    /* sin(r) approx using a degree 9 polynomial from the Taylor series
      81       expansion. Note that only the odd terms of this are non-zero.  */
      82    svfloat32_t r2 = svmul_f32_x (pg, r, r);
      83    svfloat32_t y;
      84    y = svmla_f32_x (pg, C (2), r2, C (3));
      85    y = svmla_f32_x (pg, C (1), r2, y);
      86    y = svmla_f32_x (pg, C (0), r2, y);
      87    y = svmla_f32_x (pg, r, r, svmul_f32_x (pg, y, r2));
      88  
      89    /* sign = y^sign^odd.  */
      90    y = svreinterpret_f32_u32 (sveor_u32_x (pg, svreinterpret_u32_f32 (y),
      91  					  sveor_u32_x (pg, sign, odd)));
      92  
      93    if (__glibc_unlikely (svptest_any (pg, cmp)))
      94      return special_case (x, y, cmp);
      95    return y;
      96  }