1  /* { dg-do run { target avx512fp16 } } */
       2  /* { dg-options "-Ofast -mavx512fp16 -mavx512vl -mavx512dq" } */
       3  
       4  #define AVX512FP16
       5  #include "avx512fp16-helper.h"
       6  
       7  void NOINLINE
       8  emulate_absneg_ph (V512 * dest, V512 op1, int abs)
       9  {
      10    V512 v1, v2, v3, v4;
      11    int i;
      12  
      13    unpack_ph_2twops(op1, &v1, &v2);
      14    unpack_ph_2twops(*dest, &v3, &v4);
      15  
      16    for (i = 0; i != 16; i++) {
      17      if (abs) {
      18        v3.f32[i] = __builtin_fabsf (v1.f32[i]);
      19        v4.f32[i] = __builtin_fabsf (v2.f32[i]);
      20      }
      21      else {
      22        v3.f32[i] = -v1.f32[i];
      23        v4.f32[i] = -v2.f32[i];
      24      }
      25    }
      26    *dest = pack_twops_2ph(v3, v4);
      27  }
      28  
      29  void NOINLINE
      30  emulate_copysign_ph (V512 * dest, V512 op1, V512 op2, int xorsign)
      31  {
      32    V512 v1, v2, v3, v4, v5, v6;
      33    int i;
      34  
      35    unpack_ph_2twops(op1, &v1, &v2);
      36    unpack_ph_2twops(op2, &v3, &v4);
      37    unpack_ph_2twops(*dest, &v5, &v6);
      38  
      39    for (i = 0; i != 16; i++) {
      40      if (xorsign) {
      41        v5.f32[i] = v1.f32[i] * __builtin_copysignf (1, v3.f32[i]);
      42        v6.f32[i] = v2.f32[i] * __builtin_copysignf (1, v4.f32[i]);
      43      }
      44      else {
      45        v5.f32[i] = __builtin_copysignf (v1.f32[i], v3.f32[i]);
      46        v6.f32[i] = __builtin_copysignf (v2.f32[i], v4.f32[i]);
      47      }
      48    }
      49    *dest = pack_twops_2ph(v5, v6);
      50  }
      51  
      52  
      53  void
      54  test_512 (void)
      55  {
      56    V512 res, exp;
      57  
      58    init_src ();
      59  
      60    /* Abs for vector float16.  */
      61    emulate_absneg_ph (&exp, src1, 1);
      62    for (int i = 0; i != 8; i++)
      63      res.f16[i] = __builtin_fabsf16 (src1.f16[i]);
      64    check_results (&res, &exp, 8, "abs_m128h");
      65  
      66    for (int i = 0; i != 16; i++)
      67      res.f16[i] = __builtin_fabsf16 (src1.f16[i]);
      68    check_results (&res, &exp, 16, "abs_m256h");
      69  
      70    for (int i = 0; i != 32; i++)
      71      res.f16[i] = __builtin_fabsf16 (src1.f16[i]);
      72    check_results (&res, &exp, 32, "abs_m512h");
      73  
      74    /* Neg for vector float16.  */
      75    emulate_absneg_ph (&exp, src1, 0);
      76    for (int i = 0; i != 8; i++)
      77      res.f16[i] = -(src1.f16[i]);
      78    check_results (&res, &exp, 8, "neg_m128h");
      79  
      80    for (int i = 0; i != 16; i++)
      81      res.f16[i] = -(src1.f16[i]);
      82    check_results (&res, &exp, 16, "neg_m256h");
      83  
      84    for (int i = 0; i != 32; i++)
      85      res.f16[i] = -(src1.f16[i]);
      86    check_results (&res, &exp, 32, "neg_m512h");
      87  
      88    /* Copysign for vector float16.  */
      89    emulate_copysign_ph (&exp, src1, src2, 0);
      90    for (int i = 0; i != 8; i++)
      91      res.f16[i] = __builtin_copysignf16 (src1.f16[i], src2.f16[i]);
      92    check_results (&res, &exp, 8, "copysign_m128h");
      93  
      94    for (int i = 0; i != 16; i++)
      95      res.f16[i] = __builtin_copysignf16 (src1.f16[i], src2.f16[i]);
      96    check_results (&res, &exp, 16, "copysign_m256h");
      97  
      98    for (int i = 0; i != 32; i++)
      99      res.f16[i] = __builtin_copysignf16 (src1.f16[i], src2.f16[i]);
     100    check_results (&res, &exp, 32, "copysign_m512h");
     101  
     102    /* Xorsign for vector float16.  */
     103    emulate_copysign_ph (&exp, src1, src2, 1);
     104    for (int i = 0; i != 8; i++)
     105      res.f16[i] = src1.f16[i] * __builtin_copysignf16 (1, src2.f16[i]);
     106    check_results (&res, &exp, 8, "xorsign_m128h");
     107  
     108    for (int i = 0; i != 16; i++)
     109      res.f16[i] = src1.f16[i] * __builtin_copysignf16 (1, src2.f16[i]);
     110    check_results (&res, &exp, 16, "xorsign_m256h");
     111  
     112    for (int i = 0; i != 32; i++)
     113      res.f16[i] = src1.f16[i] * __builtin_copysignf16 (1, src2.f16[i]);
     114    check_results (&res, &exp, 32, "xorsign_m512h");
     115  
     116    if (n_errs != 0) {
     117      abort ();
     118    }
     119  }