1 /* { dg-do run } */
2 /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
3
4 #pragma GCC target "+nosve"
5
6 typedef signed char S8_t;
7 typedef signed short S16_t;
8 typedef signed int S32_t;
9 typedef signed long long S64_t;
10
11 typedef signed char *__restrict__ pS8_t;
12 typedef signed short *__restrict__ pS16_t;
13 typedef signed int *__restrict__ pS32_t;
14 typedef signed long long *__restrict__ pS64_t;
15
16 typedef unsigned char U8_t;
17 typedef unsigned short U16_t;
18 typedef unsigned int U32_t;
19 typedef unsigned long long U64_t;
20
21 typedef unsigned char *__restrict__ pU8_t;
22 typedef unsigned short *__restrict__ pU16_t;
23 typedef unsigned int *__restrict__ pU32_t;
24 typedef unsigned long long *__restrict__ pU64_t;
25
26 extern void abort ();
27
28 void
29 test_addl_S64_S32_4 (pS64_t a, pS32_t b, pS32_t c)
30 {
31 int i;
32 for (i = 0; i < 4; i++)
33 a[i] = (S64_t) b[i] + (S64_t) c[i];
34 }
35 /* "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
36 /* "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
37
38 /* a = -b + c => a = c - b */
39 void
40 test_addl_S64_S32_4_neg0 (pS64_t a, pS32_t b, pS32_t c)
41 {
42 int i;
43 for (i = 0; i < 4; i++)
44 a[i] = -(S64_t) b[i] + (S64_t) c[i];
45 }
46 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
47 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
48
49 /* a = b + -c => a = b - c */
50 void
51 test_addl_S64_S32_4_neg1 (pS64_t a, pS32_t b, pS32_t c)
52 {
53 int i;
54 for (i = 0; i < 4; i++)
55 a[i] = (S64_t) b[i] + -(S64_t) c[i];
56 }
57 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
58 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
59
60 void
61 test_addl_S32_S16_8 (pS32_t a, pS16_t b, pS16_t c)
62 {
63 int i;
64 for (i = 0; i < 8; i++)
65 a[i] = (S32_t) b[i] + (S32_t) c[i];
66 }
67 /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
68 /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
69
70 void
71 test_addl_S16_S8_16 (pS16_t a, pS8_t b, pS8_t c)
72 {
73 int i;
74 for (i = 0; i < 16; i++)
75 a[i] = (S16_t) b[i] + (S16_t) c[i];
76 }
77 /* { dg-final { scan-assembler "saddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
78 /* { dg-final { scan-assembler "saddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
79
80 void
81 test_addl_U64_U32_4 (pU64_t a, pU32_t b, pU32_t c)
82 {
83 int i;
84 for (i = 0; i < 4; i++)
85 a[i] = (U64_t) b[i] + (U64_t) c[i];
86 }
87 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
88 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
89
90 void
91 test_addl_U32_U16_8 (pU32_t a, pU16_t b, pU16_t c)
92 {
93 int i;
94 for (i = 0; i < 8; i++)
95 a[i] = (U32_t) b[i] + (U32_t) c[i];
96 }
97 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
98 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
99
100 void
101 test_addl_U16_U8_16 (pU16_t a, pU8_t b, pU8_t c)
102 {
103 int i;
104 for (i = 0; i < 16; i++)
105 a[i] = (U16_t) b[i] + (U16_t) c[i];
106 }
107 /* { dg-final { scan-assembler "uaddl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
108 /* { dg-final { scan-assembler "uaddl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
109
110 void
111 test_subl_S64_S32_4 (pS64_t a, pS32_t b, pS32_t c)
112 {
113 int i;
114 for (i = 0; i < 4; i++)
115 a[i] = (S64_t) b[i] - (S64_t) c[i];
116 }
117 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
118 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
119
120 /* a = b - -c => a = b + c */
121 void
122 test_subl_S64_S32_4_neg0 (pS64_t a, pS32_t b, pS32_t c)
123 {
124 int i;
125 for (i = 0; i < 4; i++)
126 a[i] = (S64_t) b[i] - -(S64_t) c[i];
127 }
128 /* { dg-final { scan-assembler-times "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 2 } } */
129 /* { dg-final { scan-assembler-times "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 2 } } */
130
131 /* a = -b - -c => a = c - b */
132 void
133 test_subl_S64_S32_4_neg1 (pS64_t a, pS32_t b, pS32_t c)
134 {
135 int i;
136 for (i = 0; i < 4; i++)
137 a[i] = -(S64_t) b[i] - -(S64_t) c[i];
138 }
139 /* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
140 /* "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
141
142 /* a = -(b - c) => a = c - b */
143 void
144 test_subl_S64_S32_4_neg2 (pS64_t a, pS32_t b, pS32_t c)
145 {
146 int i;
147 for (i = 0; i < 4; i++)
148 a[i] = -((S64_t) b[i] - (S64_t) c[i]);
149 }
150 /* { dg-final { scan-assembler-times "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 5 } } */
151 /* { dg-final { scan-assembler-times "ssubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" 5 } } */
152
153 void
154 test_subl_S32_S16_8 (pS32_t a, pS16_t b, pS16_t c)
155 {
156 int i;
157 for (i = 0; i < 8; i++)
158 a[i] = (S32_t) b[i] - (S32_t) c[i];
159 }
160 /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
161 /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
162
163 void
164 test_subl_S16_S8_16 (pS16_t a, pS8_t b, pS8_t c)
165 {
166 int i;
167 for (i = 0; i < 16; i++)
168 a[i] = (S16_t) b[i] - (S16_t) c[i];
169 }
170 /* { dg-final { scan-assembler "ssubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
171 /* { dg-final { scan-assembler "ssubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
172
173 void
174 test_subl_U64_U32_4 (pU64_t a, pU32_t b, pU32_t c)
175 {
176 int i;
177 for (i = 0; i < 4; i++)
178 a[i] = (U64_t) b[i] - (U64_t) c[i];
179 }
180 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
181 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" } } */
182
183 void
184 test_subl_U32_U16_8 (pU32_t a, pU16_t b, pU16_t c)
185 {
186 int i;
187 for (i = 0; i < 8; i++)
188 a[i] = (U32_t) b[i] - (U32_t) c[i];
189 }
190 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.4s,\ v\[0-9\]+\.4h,\ v\[0-9\]+\.4h" } } */
191 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.4s,\ v\[0-9\]+\.8h,\ v\[0-9\]+\.8h" } } */
192
193 void
194 test_subl_U16_U8_16 (pU16_t a, pU8_t b, pU8_t c)
195 {
196 int i;
197 for (i = 0; i < 16; i++)
198 a[i] = (U16_t) b[i] - (U16_t) c[i];
199 }
200 /* { dg-final { scan-assembler "usubl\tv\[0-9\]+\.8h,\ v\[0-9\]+\.8b,\ v\[0-9\]+\.8b" } } */
201 /* { dg-final { scan-assembler "usubl2\tv\[0-9\]+\.8h,\ v\[0-9\]+\.16b,\ v\[0-9\]+\.16b" } } */
202
203 /* input values */
204
205 S64_t S64_ta[4];
206 S32_t S32_tb[4] = { 0, 1, 2, 3 };
207 S32_t S32_tc[4] = { 2, 2, -2, -2 };
208
209 S32_t S32_ta[8];
210 S16_t S16_tb[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
211 S16_t S16_tc[8] = { 2, 2, -2, -2, 2, 2, -2, -2 };
212
213 S16_t S16_ta[16];
214 S8_t S8_tb[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
215 S8_t S8_tc[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
216
217 /* expected output */
218
219 S64_t addl_rS64[] = { 2, 3, 0, 1 };
220 S64_t neg_r[] = { 2, 1, -4, -5 };
221 S32_t addl_rS32[] = { 2, 3, 0, 1, 6, 7, 4, 5 };
222 S16_t addl_rS16[] = { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 };
223 S64_t subl_rS64[] = { -2, -1, 4, 5 };
224 S32_t subl_rS32[] = { -2, -1, 4, 5, 2, 3, 8, 9 };
225 S16_t subl_rS16[] =
226 { -2, -1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 16, 17 };
227 U64_t addl_rU64[] = { 2, 3, 0x100000000, 0x100000001 };
228 U32_t addl_rU32[] = { 2, 3, 0x10000, 0x10001, 6, 7, 0x10004, 0x10005 };
229 U16_t addl_rU16[] =
230 {
231 0x0002, 0x0003, 0x0100, 0x0101, 0x0006, 0x0007, 0x0104, 0x0105,
232 0x000a, 0x000b, 0x0108, 0x0109, 0x000e, 0x000f, 0x010c, 0x010d
233 };
234 U64_t subl_rU64[] =
235 {
236 0xfffffffffffffffe, 0xffffffffffffffff,
237 0xffffffff00000004, 0xffffffff00000005
238 };
239 U32_t subl_rU32[] =
240 {
241 0xfffffffe, 0xffffffff, 0xffff0004, 0xffff0005,
242 0x00000002, 0x00000003, 0xffff0008, 0xffff0009
243 };
244 U16_t subl_rU16[] =
245 {
246 0xfffe, 0xffff, 0xff04, 0xff05, 0x0002, 0x0003, 0xff08, 0xff09,
247 0x0006, 0x0007, 0xff0c, 0xff0d, 0x000a, 0x000b, 0xff10, 0xff11
248 };
249
250 #define CHECK(T,N,AS,US) \
251 do \
252 { \
253 for (i = 0; i < N; i++) \
254 if ((US##T##_t)S##T##_ta[i] != AS##_##r##US##T[i]) \
255 abort(); \
256 } \
257 while (0)
258
259 #define NCHECK(RES) \
260 do \
261 { \
262 for (i = 0; i < 4; i++) \
263 if (S64_ta[i] != RES[i]) \
264 abort (); \
265 } \
266 while (0)
267
268 #define SCHECK(T,N,AS) CHECK(T,N,AS,S)
269 #define UCHECK(T,N,AS) CHECK(T,N,AS,U)
270
271 int
272 main ()
273 {
274 int i;
275
276 test_addl_S64_S32_4 (S64_ta, S32_tb, S32_tc);
277 SCHECK (64, 4, addl);
278 test_addl_S32_S16_8 (S32_ta, S16_tb, S16_tc);
279 SCHECK (32, 8, addl);
280 test_addl_S16_S8_16 (S16_ta, S8_tb, S8_tc);
281 SCHECK (16, 16, addl);
282 test_subl_S64_S32_4 (S64_ta, S32_tb, S32_tc);
283 SCHECK (64, 4, subl);
284 test_subl_S32_S16_8 (S32_ta, S16_tb, S16_tc);
285 SCHECK (32, 8, subl);
286 test_subl_S16_S8_16 (S16_ta, S8_tb, S8_tc);
287 SCHECK (16, 16, subl);
288
289 test_addl_U64_U32_4 (S64_ta, S32_tb, S32_tc);
290 UCHECK (64, 4, addl);
291 test_addl_U32_U16_8 (S32_ta, S16_tb, S16_tc);
292 UCHECK (32, 8, addl);
293 test_addl_U16_U8_16 (S16_ta, S8_tb, S8_tc);
294 UCHECK (16, 16, addl);
295 test_subl_U64_U32_4 (S64_ta, S32_tb, S32_tc);
296 UCHECK (64, 4, subl);
297 test_subl_U32_U16_8 (S32_ta, S16_tb, S16_tc);
298 UCHECK (32, 8, subl);
299 test_subl_U16_U8_16 (S16_ta, S8_tb, S8_tc);
300 UCHECK (16, 16, subl);
301
302 test_addl_S64_S32_4_neg0 (S64_ta, S32_tb, S32_tc);
303 NCHECK (neg_r);
304 test_addl_S64_S32_4_neg1 (S64_ta, S32_tb, S32_tc);
305 NCHECK (subl_rS64);
306 test_subl_S64_S32_4_neg0 (S64_ta, S32_tb, S32_tc);
307 NCHECK (addl_rS64);
308 test_subl_S64_S32_4_neg1 (S64_ta, S32_tb, S32_tc);
309 NCHECK (neg_r);
310 test_subl_S64_S32_4_neg2 (S64_ta, S32_tb, S32_tc);
311 NCHECK (neg_r);
312
313 return 0;
314 }
315
316