1 /* Utilities for Advanced SIMD libmvec routines.
2 Copyright (C) 2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #ifndef _V_MATH_H
20 #define _V_MATH_H
21
22 #include <arm_neon.h>
23 #include "vecmath_config.h"
24
25 #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
26
27 #define V_NAME_F1(fun) _ZGVnN4v_##fun##f
28 #define V_NAME_D1(fun) _ZGVnN2v_##fun
29 #define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
30 #define V_NAME_D2(fun) _ZGVnN2vv_##fun
31
32 /* Shorthand helpers for declaring constants. */
33 #define V2(x) \
34 { \
35 x, x \
36 }
37
38 #define V4(x) \
39 { \
40 x, x, x, x \
41 }
42
43 static inline float32x4_t
44 v_f32 (float x)
45 {
46 return (float32x4_t) V4 (x);
47 }
48 static inline uint32x4_t
49 v_u32 (uint32_t x)
50 {
51 return (uint32x4_t) V4 (x);
52 }
53 static inline int32x4_t
54 v_s32 (int32_t x)
55 {
56 return (int32x4_t) V4 (x);
57 }
58
59 /* true if any elements of a vector compare result is non-zero. */
60 static inline int
61 v_any_u32 (uint32x4_t x)
62 {
63 /* assume elements in x are either 0 or -1u. */
64 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
65 }
66 static inline float32x4_t
67 v_lookup_f32 (const float *tab, uint32x4_t idx)
68 {
69 return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
70 }
71 static inline uint32x4_t
72 v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
73 {
74 return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
75 }
76 static inline float32x4_t
77 v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
78 {
79 return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
80 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
81 }
82 static inline float32x4_t
83 v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
84 float32x4_t y, uint32x4_t p)
85 {
86 return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
87 p[1] ? f (x1[1], x2[1]) : y[1],
88 p[2] ? f (x1[2], x2[2]) : y[2],
89 p[3] ? f (x1[3], x2[3]) : y[3] };
90 }
91
92 static inline float64x2_t
93 v_f64 (double x)
94 {
95 return (float64x2_t) V2 (x);
96 }
97 static inline uint64x2_t
98 v_u64 (uint64_t x)
99 {
100 return (uint64x2_t) V2 (x);
101 }
102 static inline int64x2_t
103 v_s64 (int64_t x)
104 {
105 return (int64x2_t) V2 (x);
106 }
107
108 /* true if any elements of a vector compare result is non-zero. */
109 static inline int
110 v_any_u64 (uint64x2_t x)
111 {
112 /* assume elements in x are either 0 or -1u. */
113 return vpaddd_u64 (x) != 0;
114 }
115 /* true if all elements of a vector compare result is 1. */
116 static inline int
117 v_all_u64 (uint64x2_t x)
118 {
119 /* assume elements in x are either 0 or -1u. */
120 return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
121 }
122 static inline float64x2_t
123 v_lookup_f64 (const double *tab, uint64x2_t idx)
124 {
125 return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
126 }
127 static inline uint64x2_t
128 v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
129 {
130 return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
131 }
132 static inline float64x2_t
133 v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
134 {
135 return (float64x2_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] };
136 }
137 static inline float64x2_t
138 v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
139 float64x2_t y, uint64x2_t p)
140 {
141 return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0],
142 p[1] ? f (x1[1], x2[1]) : y[1] };
143 }
144
145 #endif