1 /* { dg-do run { target { ! ia32 } } } */
2 /* { dg-require-effective-target amx_int8 } */
3 /* { dg-options "-O2 -mamx-int8" } */
4 #include <immintrin.h>
5
6 #define AMX_INT8
7 #define DO_TEST test_amx_int8_dpbuud
8 void test_amx_int8_dpbuud ();
9 #include "amx-check.h"
10
11 /* Init tile buffer with int32 value*/
12 void init_i32_max_tile_buffer (uint8_t *buf)
13 {
14 int i, j;
15 int *ptr = (int *)buf;
16 for (i = 0; i < 16; i++)
17 for (j = 0; j < 16; j++)
18 ptr[i * 16 + j] = 2 * i - (16 - j);
19 }
20
21 void calc_matrix_dpbuud (__tile *dst, __tile *src1, __tile *src2)
22 {
23 uint8_t *src1_buf = (uint8_t *)src1->buf;
24 uint8_t *src2_buf = (uint8_t *)src2->buf;
25 int *dst_buf = (int *)dst->buf;
26
27 int M = src1->rows;
28 int N = src1->colsb / 4;
29 int K = src2->colsb / 4;
30 int i, j, k, t;
31
32 for (i = 0; i < M; i++)
33 for (j = 0; j < N; j++)
34 for (k = 0; k < K; k++)
35 for (t = 0; t < 4; t++)
36 {
37 dst_buf[i * N + k] +=
38 ((unsigned) src1_buf[i * 4 * N + 4 * j + t]) *
39 ((unsigned) src2_buf[j * 4 * K + 4 * k + t]);
40 }
41 }
42
43 void test_amx_int8_dpbuud ()
44 {
45 __tilecfg_u cfg;
46 __tile dst, dst_ref, src1, src2;
47 uint8_t tmp_dst_buf[1024];
48
49 init_i32_max_tile_buffer (tmp_dst_buf);
50
51 init_tile_config (&cfg);
52 init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf);
53 init_tile_reg_and_src (2, src1);
54 init_tile_reg_and_src (3, src2);
55
56 calc_matrix_dpbuud (&dst, &src1, &src2);
57 _tile_dpbuud (1, 2, 3);
58 _tile_stored (1, dst_ref.buf, _STRIDE);
59
60 if (!check_tile_register (&dst_ref, &dst))
61 abort();
62 }