(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
avx512fp16-vreducesh-1b.c
       1  /* { dg-do run { target avx512fp16 } } */
       2  /* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
       3  
       4  
       5  #define AVX512FP16
       6  #include "avx512fp16-helper.h"
       7  
       8  #define N_ELEMS 8
       9  
      10  V512 borrow_reduce_ps(V512 v, int imm8)
      11  {
      12    V512 temp;
      13    switch (imm8)
      14      {
      15      case 1: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 1);break;
      16      case 2: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 2);break;
      17      case 3: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 3);break;
      18      case 4: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 4);break;
      19      case 5: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 5);break;
      20      case 6: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 6);break;
      21      case 7: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 7);break;
      22      case 8: temp.zmm =  _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 8);break;
      23      }
      24    return temp;
      25  }
      26  
      27  void NOINLINE
      28  emulate_reduce_sh(V512 * dest, V512 op1,
      29                    __mmask32 k, int imm8, int zero_mask) 
      30  {
      31    V512 v1, v2, v3, v4, v5, v6, v7, v8;
      32    V512 t1;
      33    int i;
      34  
      35    unpack_ph_2twops(op1, &v1, &v2);
      36    unpack_ph_2twops(*dest, &v7, &v8);
      37    t1 = borrow_reduce_ps(v1, imm8);
      38  
      39    if ((k&1) || !k)
      40      v5.f32[0] = t1.f32[0];
      41    else if (zero_mask)
      42      v5.f32[0] = 0;
      43    else
      44      v5.f32[0] = v7.f32[0];
      45  
      46    for (i = 1; i < 8; i++)
      47      v5.f32[i] = v1.f32[i];
      48  
      49    *dest = pack_twops_2ph(v5, v6);
      50  }
      51  
      52  void
      53  test_512 (void)
      54  {
      55    V512 res;
      56    V512 exp;
      57  
      58    init_src();
      59  
      60    emulate_reduce_sh(&exp, src1,  0x1, 8, 0);
      61    res.xmmh[0] = _mm_reduce_round_sh(src1.xmmh[0], exp.xmmh[0], 8, _ROUND_CUR);
      62    check_results(&res, &exp, N_ELEMS, "_mm_reduce_round_sh");
      63  
      64    init_dest(&res, &exp);
      65    emulate_reduce_sh(&exp, src1,  0x1, 7, 0);
      66    res.xmmh[0] = _mm_mask_reduce_round_sh(res.xmmh[0], 0x1, src1.xmmh[0], exp.xmmh[0], 7, _ROUND_CUR);
      67    check_results(&res, &exp, N_ELEMS, "_mm_mask_reduce_round_sh");
      68  
      69    emulate_reduce_sh(&exp, src1,  0x3, 6, 1);
      70    res.xmmh[0] = _mm_maskz_reduce_round_sh(0x3, src1.xmmh[0], exp.xmmh[0], 6, _ROUND_CUR);
      71    check_results(&res, &exp, N_ELEMS, "_mm_maskz_reduce_round_sh");
      72  
      73  
      74    if (n_errs != 0) {
      75        abort ();
      76    }
      77  }
      78