1  /* Utilities for Advanced SIMD libmvec routines.
       2     Copyright (C) 2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifndef _V_MATH_H
      20  #define _V_MATH_H
      21  
      22  #include <arm_neon.h>
      23  #include "vecmath_config.h"
      24  
      25  #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
      26  
      27  #define V_NAME_F1(fun) _ZGVnN4v_##fun##f
      28  #define V_NAME_D1(fun) _ZGVnN2v_##fun
      29  #define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
      30  #define V_NAME_D2(fun) _ZGVnN2vv_##fun
      31  
      32  /* Shorthand helpers for declaring constants.  */
      33  #define V2(x)                                                                  \
      34    {                                                                            \
      35      x, x                                                                       \
      36    }
      37  
      38  #define V4(x)                                                                  \
      39    {                                                                            \
      40      x, x, x, x                                                                 \
      41    }
      42  
      43  static inline float32x4_t
      44  v_f32 (float x)
      45  {
      46    return (float32x4_t) V4 (x);
      47  }
      48  static inline uint32x4_t
      49  v_u32 (uint32_t x)
      50  {
      51    return (uint32x4_t) V4 (x);
      52  }
      53  static inline int32x4_t
      54  v_s32 (int32_t x)
      55  {
      56    return (int32x4_t) V4 (x);
      57  }
      58  
      59  /* true if any elements of a vector compare result is non-zero.  */
      60  static inline int
      61  v_any_u32 (uint32x4_t x)
      62  {
      63    /* assume elements in x are either 0 or -1u.  */
      64    return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
      65  }
      66  static inline float32x4_t
      67  v_lookup_f32 (const float *tab, uint32x4_t idx)
      68  {
      69    return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
      70  }
      71  static inline uint32x4_t
      72  v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
      73  {
      74    return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
      75  }
      76  static inline float32x4_t
      77  v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
      78  {
      79    return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
      80  			p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
      81  }
      82  static inline float32x4_t
      83  v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
      84  	     float32x4_t y, uint32x4_t p)
      85  {
      86    return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
      87  			p[1] ? f (x1[1], x2[1]) : y[1],
      88  			p[2] ? f (x1[2], x2[2]) : y[2],
      89  			p[3] ? f (x1[3], x2[3]) : y[3] };
      90  }
      91  
      92  static inline float64x2_t
      93  v_f64 (double x)
      94  {
      95    return (float64x2_t) V2 (x);
      96  }
      97  static inline uint64x2_t
      98  v_u64 (uint64_t x)
      99  {
     100    return (uint64x2_t) V2 (x);
     101  }
     102  static inline int64x2_t
     103  v_s64 (int64_t x)
     104  {
     105    return (int64x2_t) V2 (x);
     106  }
     107  
     108  /* true if any elements of a vector compare result is non-zero.  */
     109  static inline int
     110  v_any_u64 (uint64x2_t x)
     111  {
     112    /* assume elements in x are either 0 or -1u.  */
     113    return vpaddd_u64 (x) != 0;
     114  }
     115  /* true if all elements of a vector compare result is 1.  */
     116  static inline int
     117  v_all_u64 (uint64x2_t x)
     118  {
     119    /* assume elements in x are either 0 or -1u.  */
     120    return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
     121  }
     122  static inline float64x2_t
     123  v_lookup_f64 (const double *tab, uint64x2_t idx)
     124  {
     125    return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
     126  }
     127  static inline uint64x2_t
     128  v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
     129  {
     130    return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
     131  }
     132  static inline float64x2_t
     133  v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
     134  {
     135    return (float64x2_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] };
     136  }
     137  static inline float64x2_t
     138  v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
     139  	     float64x2_t y, uint64x2_t p)
     140  {
     141    return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0],
     142  			p[1] ? f (x1[1], x2[1]) : y[1] };
     143  }
     144  
     145  #endif