1 /* { dg-do compile } */
2 /* { dg-options "-O3 -fno-vect-cost-model" } */
3
4 /* Check that we vectorize to a full 128-bit vector for _Float16 and __fp16
5 types. */
6
7 /* Enable ARMv8.2-A+fp16 so we have access to the vector instructions. */
8 #pragma GCC target ("arch=armv8.2-a+fp16")
9
10 _Float16
11 sum_Float16 (_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
12 _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
13 _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
14 {
15 for (int i = 0; i < 256; i++)
16 a[i] = b[i] + c[i];
17 }
18
19 _Float16
20 sum_fp16 (__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
21 __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
22 __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
23 {
24 for (int i = 0; i < 256; i++)
25 a[i] = b[i] + c[i];
26 }
27
28 /* Two FADD operations on "8h" data widths, one from sum_Float16, one from
29 sum_fp16. */
30 /* { dg-final { scan-assembler-times "fadd\tv\[0-9\]\+.8h" 2 } } */