(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
amxfp16-dpfp16ps-2.c
       1  /* { dg-do run { target { ! ia32 } } } */
       2  /* { dg-require-effective-target amx_fp16 } */
       3  /* { dg-require-effective-target avx512fp16 } */
       4  /* { dg-options "-O2 -mamx-fp16 -mavx512fp16" } */
       5  #define AMX_FP16
       6  #define DO_TEST test_amx_fp16_dpfp16ps
       7  void test_amx_fp16_dpfp16ps ();
       8  #include "amx-helper.h"
       9  
      10  void calc_matrix_dpfp16ps (__tile *dst, __tile *src1, __tile *src2)
      11  {
      12    uint16_t *src1_buf = (uint16_t *)src1->buf;
      13    uint16_t *src2_buf = (uint16_t *)src2->buf;
      14    float *dst_buf = (float *)dst->buf;
      15    
      16    int M = src1->rows;
      17    int N = src1->colsb / 4;
      18    int K = src2->colsb / 4;
      19    int i, j, k, t;
      20  
      21    for (i = 0; i < M; i++)
      22      for (j = 0; j < N; j++)
      23        for (k = 0; k < K; k++)
      24  	for (t = 0; t < 2; t+=2)
      25  	  {    
      26  	    dst_buf[i * K + k] += 
      27  	      (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t]) *
      28  	      make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t])) +
      29  	      (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t + 1]) *
      30  	      make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t + 1]));
      31  	  }
      32  
      33  }
      34  
      35  void test_amx_fp16_dpfp16ps ()
      36  {
      37    __tilecfg_u cfg;
      38    __tile dst, dst_ref, src1, src2;
      39    uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
      40  
      41    init_fp16_max_tile_buffer (tmp_dst_buf);
      42    init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
      43  
      44    init_tile_config (&cfg);
      45    init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
      46    init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
      47    init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
      48  
      49    calc_matrix_dpfp16ps (&dst, &src1, &src2);
      50    
      51    _tile_dpfp16ps (1, 2, 3);
      52    _tile_stored (1, dst_ref.buf, _STRIDE);
      53  
      54    if (!check_float_tile_register (&dst_ref, &dst))
      55      abort ();
      56  }