1 /* { dg-do assemble { target { arm*-*-* } } } */
2 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
3 /* { dg-add-options arm_v8_2a_bf16_neon } */
4 /* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+fp16" } */
5
6 #include "arm_neon.h"
7
8 float32x2_t
9 test_vbfdot_vcreate (float32x2_t r, uint64_t a, uint64_t b)
10 {
11 bfloat16x4_t _a = vcreate_bf16(a);
12 bfloat16x4_t _b = vcreate_bf16(b);
13
14 return vbfdot_f32 (r, _a, _b);
15 }
16 /* { dg-final { scan-assembler {vdot.bf16\td[0-9]+, d[0-9]+, d[0-9]+} } } */
17
18 bfloat16x8_t test_vcombine_bf16 (bfloat16x4_t a, bfloat16x4_t b)
19 {
20 return vcombine_bf16 (a, b);
21 }
22
23 bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a)
24 {
25 return vget_high_bf16 (a);
26 }
27
28 bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a)
29 {
30 return vget_low_bf16 (a);
31 }
32
33 bfloat16_t test_vget_lane_bf16 (bfloat16x4_t a)
34 {
35 return vget_lane_bf16 (a, 1);
36 }
37
38 bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t a)
39 {
40 return vgetq_lane_bf16 (a, 7);
41 }
42
43 bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a, bfloat16x4_t b)
44 {
45 return vset_lane_bf16 (a, b, 1);
46 }
47
48 bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a, bfloat16x8_t b)
49 {
50 return vsetq_lane_bf16 (a, b, 7);
51 }
52
53 bfloat16x4_t vdup_test (bfloat16_t a)
54 {
55 return vdup_n_bf16 (a);
56 }
57 /* { dg-final { scan-assembler {vdup\.16\td[0-9]+, r[0-9]+} } } */
58
59 bfloat16x8_t vdupq_test (bfloat16_t a)
60 {
61 return vdupq_n_bf16 (a);
62 }
63 /* { dg-final { scan-assembler {vdup\.16\tq[0-9]+, r[0-9]+} } } */
64
65
66 bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t a)
67 {
68 return vdup_lane_bf16 (a, 1);
69 }
70 /* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, d[0-9]+\[1\]} 1 } } */
71
72 bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t a)
73 {
74 return vdupq_lane_bf16 (a, 1);
75 }
76 /* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, d[0-9]+\[1\]} 1 } } */
77
78 bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t a)
79 {
80 return vdup_laneq_bf16 (a, 3);
81 }
82
83 bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t a)
84 {
85 return vdupq_laneq_bf16 (a, 3);
86 }
87
88 bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t a)
89 {
90 return vduph_lane_bf16 (a, 1);
91 }
92
93 bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t a)
94 {
95 return vduph_laneq_bf16 (a, 7);
96 }