1 /* { dg-do run { target avx512fp16 } } */
2 /* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
3
4
5 #define AVX512FP16
6 #include "avx512fp16-helper.h"
7
8 #define N_ELEMS (AVX512F_LEN / 16)
9
10 void NOINLINE
11 EMULATE(fnmsub_ph) (V512 * dest, V512 op1, V512 op2,
12 __mmask32 k, int zero_mask)
13 {
14 V512 v1, v2, v3, v4, v5, v6, v7, v8;
15 int i;
16 __mmask16 m1, m2;
17
18 m1 = k & 0xffff;
19 m2 = (k >> 16) & 0xffff;
20
21 unpack_ph_2twops(op1, &v1, &v2);
22 unpack_ph_2twops(op2, &v3, &v4);
23 unpack_ph_2twops(*dest, &v7, &v8);
24
25 for (i = 0; i < 16; i++) {
26 if (((1 << i) & m1) == 0) {
27 if (zero_mask) {
28 v5.f32[i] = 0;
29 }
30 else {
31 v5.u32[i] = v7.u32[i];
32 }
33 }
34 else {
35 v5.f32[i] = -(v1.f32[i] * v3.f32[i]) - v7.f32[i];
36 }
37
38 if (((1 << i) & m2) == 0) {
39 if (zero_mask) {
40 v6.f32[i] = 0;
41 }
42 else {
43 v6.u32[i] = v8.u32[i];
44 }
45 }
46 else {
47 v6.f32[i] = -(v2.f32[i] * v4.f32[i]) - v8.f32[i];
48 }
49
50 }
51 *dest = pack_twops_2ph(v5, v6);
52 }
53
54 void NOINLINE
55 EMULATE(m_fnmsub_ph) (V512 * dest, V512 op1, V512 op2,
56 __mmask32 k, int zero_mask)
57 {
58 V512 v1, v2, v3, v4, v5, v6, v7, v8;
59 int i;
60 __mmask16 m1, m2;
61
62 m1 = k & 0xffff;
63 m2 = (k >> 16) & 0xffff;
64
65 unpack_ph_2twops(op1, &v1, &v2);
66 unpack_ph_2twops(op2, &v3, &v4);
67 unpack_ph_2twops(*dest, &v7, &v8);
68
69 for (i = 0; i < 16; i++) {
70 if (((1 << i) & m1) == 0) {
71 if (zero_mask) {
72 v5.f32[i] = 0;
73 }
74 else {
75 v5.u32[i] = v7.u32[i];
76 }
77 }
78 else {
79 v5.f32[i] = -(v1.f32[i] * v7.f32[i]) - v3.f32[i];
80 }
81
82 if (((1 << i) & m2) == 0) {
83 if (zero_mask) {
84 v6.f32[i] = 0;
85 }
86 else {
87 v6.u32[i] = v8.u32[i];
88 }
89 }
90 else {
91 v6.f32[i] = -(v2.f32[i] * v8.f32[i]) - v4.f32[i];
92 }
93
94 }
95 *dest = pack_twops_2ph(v5, v6);
96 }
97
98 void
99 TEST (void)
100 {
101 V512 res;
102 V512 exp;
103
104 init_src();
105
106 init_dest(&res, &exp);
107 EMULATE(fnmsub_ph)(&exp, src1, src2, NET_MASK, 0);
108 HF(res) = INTRINSIC (_fnmsub_ph) (HF(src1), HF(src2),
109 HF(res));
110 CHECK_RESULT (&res, &exp, N_ELEMS, _fnmsub_ph);
111
112 init_dest(&res, &exp);
113 EMULATE(m_fnmsub_ph)(&exp, src1, src2, MASK_VALUE, 0);
114 HF(res) = INTRINSIC (_mask_fnmsub_ph) (HF(res), MASK_VALUE,
115 HF(src1), HF(src2));
116 CHECK_RESULT (&res, &exp, N_ELEMS, _mask_fnmsub_ph);
117
118 init_dest(&res, &exp);
119 EMULATE(fnmsub_ph)(&exp, src1, src2, MASK_VALUE, 0);
120 HF(res) = INTRINSIC (_mask3_fnmsub_ph) (HF(src1), HF(src2), HF(res), MASK_VALUE);
121 CHECK_RESULT (&res, &exp, N_ELEMS, _mask3_fnmsub_ph);
122
123 init_dest(&res, &exp);
124 EMULATE(fnmsub_ph)(&exp, src1, src2, ZMASK_VALUE, 1);
125 HF(res) = INTRINSIC (_maskz_fnmsub_ph) (ZMASK_VALUE, HF(src1), HF(src2), HF(res));
126 CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_fnmsub_ph);
127
128 #if AVX512F_LEN == 512
129 init_dest(&res, &exp);
130 EMULATE(fnmsub_ph)(&exp, src1, src2, NET_MASK, 0);
131 HF(res) = INTRINSIC (_fnmsub_round_ph) (HF(src1), HF(src2),
132 HF(res), _ROUND_NINT);
133 CHECK_RESULT (&res, &exp, N_ELEMS, _fnmsub_ph);
134
135 init_dest(&res, &exp);
136 EMULATE(m_fnmsub_ph)(&exp, src1, src2, MASK_VALUE, 0);
137 HF(res) = INTRINSIC (_mask_fnmsub_round_ph) (HF(res), MASK_VALUE,
138 HF(src1), HF(src2), _ROUND_NINT);
139 CHECK_RESULT (&res, &exp, N_ELEMS, _mask_fnmsub_ph);
140
141 EMULATE(fnmsub_ph)(&exp, src1, src2, MASK_VALUE, 0);
142 HF(res) = INTRINSIC (_mask3_fnmsub_round_ph) (HF(src1), HF(src2),
143 HF(res), MASK_VALUE, _ROUND_NINT);
144 CHECK_RESULT (&res, &exp, N_ELEMS, _mask3_fnmsub_ph);
145
146 init_dest(&res, &exp);
147 EMULATE(fnmsub_ph)(&exp, src1, src2, ZMASK_VALUE, 1);
148 HF(res) = INTRINSIC (_maskz_fnmsub_round_ph) (ZMASK_VALUE, HF(src1),
149 HF(src2), HF(res), _ROUND_NINT);
150 CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_fnmsub_ph);
151 #endif
152
153 if (n_errs != 0) {
154 abort ();
155 }
156 }
157