1 /* { dg-do run { target avx512fp16 } } */
2 /* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
3
4
5 #define AVX512FP16
6 #include "avx512fp16-helper.h"
7
8 #define N_ELEMS (AVX512F_LEN / 16)
9
10 void NOINLINE
11 EMULATE(fnmadd_ph) (V512 * dest, V512 op1, V512 op2,
12 __mmask32 k, int zero_mask)
13 {
14 V512 v1, v2, v3, v4, v5, v6, v7, v8;
15 int i;
16 __mmask16 m1, m2;
17
18 m1 = k & 0xffff;
19 m2 = (k >> 16) & 0xffff;
20
21 unpack_ph_2twops(op1, &v1, &v2);
22 unpack_ph_2twops(op2, &v3, &v4);
23 unpack_ph_2twops(*dest, &v7, &v8);
24
25 for (i = 0; i < 16; i++) {
26 if (((1 << i) & m1) == 0) {
27 if (zero_mask) {
28 v5.f32[i] = 0;
29 }
30 else {
31 v5.u32[i] = v7.u32[i];
32 }
33 }
34 else {
35 v5.f32[i] = -(v1.f32[i] * v3.f32[i]) + v7.f32[i];
36 }
37
38 if (((1 << i) & m2) == 0) {
39 if (zero_mask) {
40 v6.f32[i] = 0;
41 }
42 else {
43 v6.u32[i] = v8.u32[i];
44 }
45 }
46 else {
47 v6.f32[i] = -(v2.f32[i] * v4.f32[i]) + v8.f32[i];
48 }
49
50 }
51 *dest = pack_twops_2ph(v5, v6);
52 }
53
54 void NOINLINE
55 EMULATE(m_fnmadd_ph) (V512 * dest, V512 op1, V512 op2,
56 __mmask32 k, int zero_mask)
57 {
58 V512 v1, v2, v3, v4, v5, v6, v7, v8;
59 int i;
60 __mmask16 m1, m2;
61
62 m1 = k & 0xffff;
63 m2 = (k >> 16) & 0xffff;
64
65 unpack_ph_2twops(op1, &v1, &v2);
66 unpack_ph_2twops(op2, &v3, &v4);
67 unpack_ph_2twops(*dest, &v7, &v8);
68
69 for (i = 0; i < 16; i++) {
70 if (((1 << i) & m1) == 0) {
71 if (zero_mask) {
72 v5.f32[i] = 0;
73 }
74 else {
75 v5.u32[i] = v7.u32[i];
76 }
77 }
78 else {
79 v5.f32[i] = -(v1.f32[i] * v7.f32[i]) + v3.f32[i];
80 }
81
82 if (((1 << i) & m2) == 0) {
83 if (zero_mask) {
84 v6.f32[i] = 0;
85 }
86 else {
87 v6.u32[i] = v8.u32[i];
88 }
89 }
90 else {
91 v6.f32[i] = -(v2.f32[i] * v8.f32[i]) + v4.f32[i];
92 }
93
94 }
95 *dest = pack_twops_2ph(v5, v6);
96 }
97
98 void
99 TEST (void)
100 {
101 V512 res;
102 V512 exp;
103
104 init_src();
105
106 init_dest(&res, &exp);
107 EMULATE(fnmadd_ph)(&exp, src1, src2, NET_MASK, 0);
108 HF(res) = INTRINSIC (_fnmadd_ph) (HF(src1), HF(src2),
109 HF(res));
110 CHECK_RESULT (&res, &exp, N_ELEMS, _fnmadd_ph);
111
112 init_dest(&res, &exp);
113 EMULATE(m_fnmadd_ph)(&exp, src1, src2, MASK_VALUE, 0);
114 HF(res) = INTRINSIC (_mask_fnmadd_ph) (HF(res), MASK_VALUE,
115 HF(src1), HF(src2));
116 CHECK_RESULT (&res, &exp, N_ELEMS, _mask_fnmadd_ph);
117
118 init_dest(&res, &exp);
119 EMULATE(fnmadd_ph)(&exp, src1, src2, MASK_VALUE, 0);
120 HF(res) = INTRINSIC (_mask3_fnmadd_ph) (HF(src1), HF(src2),
121 HF(res), MASK_VALUE);
122 CHECK_RESULT (&res, &exp, N_ELEMS, _mask3_fnmadd_ph);
123
124 init_dest(&res, &exp);
125 EMULATE(fnmadd_ph)(&exp, src1, src2, ZMASK_VALUE, 1);
126 HF(res) = INTRINSIC (_maskz_fnmadd_ph) (ZMASK_VALUE, HF(src1),
127 HF(src2), HF(res));
128 CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_fnmadd_ph);
129
130 #if AVX512F_LEN == 512
131 init_dest(&res, &exp);
132 EMULATE(fnmadd_ph)(&exp, src1, src2, NET_MASK, 0);
133 HF(res) = INTRINSIC (_fnmadd_round_ph) (HF(src1), HF(src2),
134 HF(res), _ROUND_NINT);
135 CHECK_RESULT (&res, &exp, N_ELEMS, _fnmadd_ph);
136
137 init_dest(&res, &exp);
138 EMULATE(m_fnmadd_ph)(&exp, src1, src2, MASK_VALUE, 0);
139 HF(res) = INTRINSIC (_mask_fnmadd_round_ph) (HF(res), MASK_VALUE,
140 HF(src1), HF(src2), _ROUND_NINT);
141 CHECK_RESULT (&res, &exp, N_ELEMS, _mask_fnmadd_ph);
142
143 EMULATE(fnmadd_ph)(&exp, src1, src2, MASK_VALUE, 0);
144 HF(res) = INTRINSIC (_mask3_fnmadd_round_ph) (HF(src1), HF(src2),
145 HF(res), MASK_VALUE, _ROUND_NINT);
146 CHECK_RESULT (&res, &exp, N_ELEMS, _mask3_fnmadd_ph);
147
148 init_dest(&res, &exp);
149 EMULATE(fnmadd_ph)(&exp, src1, src2, ZMASK_VALUE, 1);
150 HF(res) = INTRINSIC (_maskz_fnmadd_round_ph) (ZMASK_VALUE, HF(src1),
151 HF(src2), HF(res), _ROUND_NINT);
152 CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_fnmadd_ph);
153 #endif
154
155 if (n_errs != 0) {
156 abort ();
157 }
158 }
159