1 /* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
2 /* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
3
4 #include <arm_neon.h>
5
6 /*
7 ** s32x2_1:
8 ** dup v0\.2s, w0
9 ** ret
10 */
11 int32x2_t
12 s32x2_1 (int32_t x)
13 {
14 int32_t arr[] = { x, x };
15 return vld1_s32 (arr);
16 }
17
18 /*
19 ** s32x2_2:
20 ** fmov s0, w0
21 ** ret
22 */
23 int32x2_t
24 s32x2_2 (int32_t x)
25 {
26 int32_t arr[] = { x, 0 };
27 return vld1_s32 (arr);
28 }
29
30 /*
31 ** s32x2_3:
32 ** fmov s0, w0
33 ** ins v0\.s\[1\], w1
34 ** ret
35 */
36 int32x2_t
37 s32x2_3 (int32_t x, int32_t y)
38 {
39 int32_t arr[] = { x, y };
40 return vld1_s32 (arr);
41 }
42
43 /*
44 ** f32x2_1:
45 ** dup v0\.2s, v0.s\[0\]
46 ** ret
47 */
48 float32x2_t
49 f32x2_1 (float32_t x)
50 {
51 float32_t arr[] = { x, x };
52 return vld1_f32 (arr);
53 }
54
55 /*
56 ** f32x2_2:
57 ** ins v0\.s\[1\], v1.s\[0\]
58 ** ret
59 */
60 float32x2_t
61 f32x2_2 (float32_t x, float32_t y)
62 {
63 float32_t arr[] = { x, y };
64 return vld1_f32 (arr);
65 }
66
67 /*
68 ** s16x4_1:
69 ** dup v0\.4h, w0
70 ** ret
71 */
72 int16x4_t
73 s16x4_1 (int16_t x)
74 {
75 int16_t arr[] = { x, x, x, x };
76 return vld1_s16 (arr);
77 }
78
79 /*
80 ** s16x4_2:
81 ** ...
82 ** fmov [dsh]0, [wx][0-9]+
83 ** ret
84 */
85 int16x4_t
86 s16x4_2 (int16_t x)
87 {
88 int16_t arr[] = { x, 0, 0, 0 };
89 return vld1_s16 (arr);
90 }
91
92 /*
93 ** s16x4_3:
94 ** dup v0\.4h, w1
95 ** ins v0.h\[0\], w0
96 ** ret
97 */
98 int16x4_t
99 s16x4_3 (int16_t x, int16_t y)
100 {
101 int16_t arr[] = { x, y, y, y };
102 return vld1_s16 (arr);
103 }
104
105 /*
106 ** f16x4_1:
107 ** dup v0\.4h, v0.h\[0\]
108 ** ret
109 */
110 float16x4_t
111 f16x4_1 (float16_t x)
112 {
113 float16_t arr[] = { x, x, x, x };
114 return vld1_f16 (arr);
115 }
116
117 /*
118 ** s64x2_1:
119 ** dup v0\.2d, x0
120 ** ret
121 */
122 int64x2_t
123 s64x2_1 (int64_t x)
124 {
125 int64_t arr[] = { x, x };
126 return vld1q_s64 (arr);
127 }
128
129 /*
130 ** s64x2_2: { xfail *-*-* }
131 ** fmov d0, x0
132 ** ret
133 */
134 int64x2_t
135 s64x2_2 (int64_t x)
136 {
137 int64_t arr[] = { x, 0 };
138 return vld1q_s64 (arr);
139 }
140
141 /*
142 ** s64x2_3:
143 ** fmov d0, x0
144 ** ins v0\.d\[1\], x1
145 ** ret
146 */
147 int64x2_t
148 s64x2_3 (int64_t x, int64_t y)
149 {
150 int64_t arr[] = { x, y };
151 return vld1q_s64 (arr);
152 }
153
154 /*
155 ** f64x2_1:
156 ** dup v0\.2d, v0.d\[0\]
157 ** ret
158 */
159 float64x2_t
160 f64x2_1 (float64_t x)
161 {
162 float64_t arr[] = { x, x };
163 return vld1q_f64 (arr);
164 }
165
166 /*
167 ** f64x2_2:
168 ** ins v0\.d\[1\], v1.d\[0\]
169 ** ret
170 */
171 float64x2_t
172 f64x2_2 (float64_t x, float64_t y)
173 {
174 float64_t arr[] = { x, y };
175 return vld1q_f64 (arr);
176 }
177
178 /*
179 ** s32x4_1:
180 ** dup v0\.4s, w0
181 ** ret
182 */
183 int32x4_t
184 s32x4_1 (int32_t x)
185 {
186 int32_t arr[] = { x, x, x, x };
187 return vld1q_s32 (arr);
188 }
189
190 /*
191 ** s32x4_2: { xfail *-*-* }
192 ** fmov s0, w0
193 ** ret
194 */
195 int32x4_t
196 s32x4_2 (int32_t x)
197 {
198 int32_t arr[] = { x, 0, 0, 0 };
199 return vld1q_s32 (arr);
200 }
201
202 /*
203 ** s32x4_3:
204 ** dup v0\.4s, w1
205 ** ins v0.s\[0\], w0
206 ** ret
207 */
208 int32x4_t
209 s32x4_3 (int32_t x, int32_t y)
210 {
211 int32_t arr[] = { x, y, y, y };
212 return vld1q_s32 (arr);
213 }
214
215 /*
216 ** f32x4_1:
217 ** dup v0\.4s, v0.s\[0\]
218 ** ret
219 */
220 float32x4_t
221 f32x4_1 (float32_t x)
222 {
223 float32_t arr[] = { x, x, x, x };
224 return vld1q_f32 (arr);
225 }
226
227 void consume (float32x4_t, float32x4_t, float32x4_t, float32x4_t);
228
229 /*
230 ** produce_1:
231 ** (
232 ** dup v0\.4s, v0\.s\[0\]
233 ** dup v1\.4s, v1\.s\[0\]
234 ** dup v2\.4s, v2\.s\[0\]
235 ** dup v3\.4s, v3\.s\[0\]
236 ** |
237 ** dup v3\.4s, v3\.s\[0\]
238 ** dup v2\.4s, v2\.s\[0\]
239 ** dup v1\.4s, v1\.s\[0\]
240 ** dup v0\.4s, v0\.s\[0\]
241 ** )
242 ** b consume
243 */
244 void
245 produce_1 (float32_t a, float32_t b, float32_t c, float32_t d)
246 {
247 float arr[4][4] = {
248 { a, a, a, a },
249 { b, b, b, b },
250 { c, c, c, c },
251 { d, d, d, d }
252 };
253 consume (vld1q_f32 (arr[0]), vld1q_f32 (arr[1]),
254 vld1q_f32 (arr[2]), vld1q_f32 (arr[3]));
255 }
256
257 /*
258 ** produce_2:
259 ** (
260 ** dup v0\.4s, v0\.s\[0\]
261 ** dup v1\.4s, v1\.s\[0\]
262 ** dup v2\.4s, v2\.s\[0\]
263 ** dup v3\.4s, v3\.s\[0\]
264 ** |
265 ** dup v3\.4s, v3\.s\[0\]
266 ** dup v2\.4s, v2\.s\[0\]
267 ** dup v1\.4s, v1\.s\[0\]
268 ** dup v0\.4s, v0\.s\[0\]
269 ** )
270 ** b consume
271 */
272 void
273 produce_2 (float32_t a, float32_t b, float32_t c, float32_t d)
274 {
275 float arr0[] = { a, a, a, a };
276 float arr1[] = { b, b, b, b };
277 float arr2[] = { c, c, c, c };
278 float arr3[] = { d, d, d, d };
279 consume (vld1q_f32 (arr0), vld1q_f32 (arr1),
280 vld1q_f32 (arr2), vld1q_f32 (arr3));
281 }