(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
avx512fp16-vfmaddsubXXXph-1b.c
       1  /* { dg-do run { target avx512fp16 } } */
       2  /* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
       3  
       4  
       5  #define AVX512FP16
       6  #include "avx512fp16-helper.h"
       7  
       8  #define N_ELEMS (AVX512F_LEN / 16)
       9  
      10  void NOINLINE
      11  EMULATE(fmaddsub_ph) (V512 * dest, V512 op1, V512 op2,
      12                      __mmask32 k, int zero_mask)
      13  {
      14      V512 v1, v2, v3, v4, v5, v6, v7, v8;
      15      int i;
      16      __mmask16 m1, m2;
      17  
      18      m1 = k & 0xffff;
      19      m2 = (k >> 16) & 0xffff;
      20  
      21      unpack_ph_2twops(op1, &v1, &v2);
      22      unpack_ph_2twops(op2, &v3, &v4);
      23      unpack_ph_2twops(*dest, &v7, &v8);
      24  
      25      for (i = 0; i < 16; i++) {
      26          if (((1 << i) & m1) == 0) {
      27              if (zero_mask) {
      28                  v5.f32[i] = 0;
      29              }
      30              else {
      31                  v5.u32[i] = v7.u32[i];
      32              }
      33          }
      34          else {
      35              if (i % 2 == 1) {
      36                  v5.f32[i] = v1.f32[i] * v3.f32[i] + v7.f32[i];
      37              }
      38              else {
      39                  v5.f32[i] = v1.f32[i] * v3.f32[i] - v7.f32[i];
      40              }
      41          }
      42  
      43          if (((1 << i) & m2) == 0) {
      44              if (zero_mask) {
      45                  v6.f32[i] = 0;
      46              }
      47              else {
      48                  v6.u32[i] = v8.u32[i];
      49              }
      50          }
      51          else {
      52              if (i % 2 == 1) {
      53                  v6.f32[i] = v2.f32[i] * v4.f32[i] + v8.f32[i];
      54              }
      55              else {
      56                  v6.f32[i] = v2.f32[i] * v4.f32[i] - v8.f32[i];
      57              }
      58          }
      59      }
      60      *dest = pack_twops_2ph(v5, v6);
      61  }
      62  
      63  void NOINLINE
      64  EMULATE(m_fmaddsub_ph) (V512 * dest, V512 op1, V512 op2,
      65                      __mmask32 k, int zero_mask)
      66  {
      67      V512 v1, v2, v3, v4, v5, v6, v7, v8;
      68      int i;
      69      __mmask16 m1, m2;
      70  
      71      m1 = k & 0xffff;
      72      m2 = (k >> 16) & 0xffff;
      73  
      74      unpack_ph_2twops(op1, &v1, &v2);
      75      unpack_ph_2twops(op2, &v3, &v4);
      76      unpack_ph_2twops(*dest, &v7, &v8);
      77  
      78      for (i = 0; i < 16; i++) {
      79          if (((1 << i) & m1) == 0) {
      80              if (zero_mask) {
      81                  v5.f32[i] = 0;
      82              }
      83              else {
      84                  v5.u32[i] = v7.u32[i];
      85              }
      86          }
      87          else {
      88              if (i % 2 == 1) {
      89                  v5.f32[i] = v1.f32[i] * v7.f32[i] + v3.f32[i];
      90              }
      91              else {
      92                  v5.f32[i] = v1.f32[i] * v7.f32[i] - v3.f32[i];
      93              }
      94          }
      95  
      96          if (((1 << i) & m2) == 0) {
      97              if (zero_mask) {
      98                  v6.f32[i] = 0;
      99              }
     100              else {
     101                  v6.u32[i] = v8.u32[i];
     102              }
     103          }
     104          else {
     105              if (i % 2 == 1) {
     106                  v6.f32[i] = v2.f32[i] * v8.f32[i] + v4.f32[i];
     107              }
     108              else {
     109                  v6.f32[i] = v2.f32[i] * v8.f32[i] - v4.f32[i];
     110              }
     111          }
     112      }
     113      *dest = pack_twops_2ph(v5, v6);
     114  }
     115  
     116  void
     117  TEST (void)
     118  {
     119    V512 res;
     120    V512 exp;
     121  
     122    init_src();
     123  
     124    init_dest(&res, &exp);
     125    EMULATE(fmaddsub_ph)(&exp, src1, src2,  NET_MASK, 0);
     126    HF(res) = INTRINSIC (_fmaddsub_ph) (HF(src1), HF(src2), HF(res));
     127    CHECK_RESULT (&res, &exp, N_ELEMS, _fmaddsub_ph);
     128    init_dest(&res, &exp);
     129    EMULATE(fmaddsub_ph)(&exp, src1, src2,  MASK_VALUE, 0);
     130    HF(res) = INTRINSIC (_mask3_fmaddsub_ph) (HF(src1), HF(src2),
     131  				      HF(res), MASK_VALUE);
     132    CHECK_RESULT (&res, &exp, N_ELEMS, _mask3_fmaddsub_ph);
     133    init_dest(&res, &exp);
     134    EMULATE(m_fmaddsub_ph)(&exp, src1, src2,  MASK_VALUE, 0);
     135    HF(res) = INTRINSIC (_mask_fmaddsub_ph) (HF(res), MASK_VALUE,
     136  				     HF(src1), HF(src2));
     137    CHECK_RESULT (&res, &exp, N_ELEMS, _mask_fmaddsub_ph);
     138    init_dest(&res, &exp);
     139    EMULATE(fmaddsub_ph)(&exp, src1, src2,  ZMASK_VALUE, 1);
     140    HF(res) = INTRINSIC (_maskz_fmaddsub_ph) (ZMASK_VALUE, HF(src1),
     141  				      HF(src2), HF(res));
     142    CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_fmaddsub_ph);
     143  
     144    init_dest(&res, &exp);
     145  #if AVX512F_LEN == 512
     146    EMULATE(fmaddsub_ph)(&exp, src1, src2,  NET_MASK, 0);
     147    HF(res) = INTRINSIC (_fmaddsub_round_ph) (HF(src1), HF(src2),
     148  				      HF(res), _ROUND_NINT);
     149    CHECK_RESULT (&res, &exp, N_ELEMS, _fmaddsub_ph);
     150    init_dest(&res, &exp);
     151    EMULATE(fmaddsub_ph)(&exp, src1, src2,  MASK_VALUE, 0);
     152    HF(res) = INTRINSIC (_mask3_fmaddsub_round_ph) (HF(src1), HF(src2),
     153  					    HF(res), MASK_VALUE, _ROUND_NINT);
     154    CHECK_RESULT (&res, &exp, N_ELEMS, _mask3_fmaddsub_ph);
     155    init_dest(&res, &exp);
     156    EMULATE(m_fmaddsub_ph)(&exp, src1, src2,  MASK_VALUE, 0);
     157    HF(res) = INTRINSIC (_mask_fmaddsub_round_ph) (HF(res), MASK_VALUE,
     158  					   HF(src1), HF(src2), _ROUND_NINT);
     159    CHECK_RESULT (&res, &exp, N_ELEMS, _mask_fmaddsub_ph);
     160    init_dest(&res, &exp);
     161    EMULATE(fmaddsub_ph)(&exp, src1, src2,  ZMASK_VALUE, 1);
     162    HF(res) = INTRINSIC (_maskz_fmaddsub_round_ph) (ZMASK_VALUE, HF(src1),
     163  					    HF(src2), HF(res), _ROUND_NINT);
     164    CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_fmaddsub_ph);
     165  #endif
     166  
     167    if (n_errs != 0) {
     168        abort ();
     169    }
     170  }
     171