(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
avx512fp16-helper.h
       1  /* This file is used for emulation of avx512fp16 runtime tests. To
       2     verify the correctness of _Float16 type calculation, the idea is
       3     convert _Float16 to float and do emulation using float instructions. 
       4     _Float16 type should not be emulate or check by itself.  */
       5  
       6  #include "avx512f-helper.h"
       7  #ifndef AVX512FP16_HELPER_INCLUDED
       8  #define AVX512FP16_HELPER_INCLUDED
       9  
      10  #ifdef DEBUG
      11  #include <string.h>
      12  #endif
      13  #include <math.h>
      14  #include <limits.h>
      15  #include <float.h>
      16  
      17  /* Useful macros.  */
      18  #define NOINLINE __attribute__((noinline,noclone))
      19  #define _ROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
      20  #define _ROUND_CUR 8 
      21  #define AVX512F_MAX_ELEM 512 / 32
      22  
      23  /* Structure for _Float16 emulation  */
      24  typedef union
      25  {
      26    __m512          zmm;
      27    __m512h         zmmh;
      28    __m512i         zmmi;
      29    __m512d         zmmd;
      30    __m256          ymm[2];
      31    __m256h         ymmh[2];
      32    __m256i         ymmi[2];
      33    __m256d         ymmd[2];
      34    __m128h         xmmh[4];
      35    __m128	  xmm[4];
      36    __m128i	  xmmi[4];
      37    __m128d	  xmmd[4];
      38    unsigned short  u16[32];
      39    unsigned int    u32[16];
      40    int		  i32[16];
      41    long long	  s64[8];
      42    unsigned long long u64[8];
      43    double          f64[8];
      44    float           f32[16];
      45    _Float16        f16[32];
      46  } V512;
      47  
      48  /* Global variables.  */
      49  V512 src1, src2, src3, src3f;
      50  int n_errs = 0;
      51  
      52  /* Helper function for packing/unpacking ph operands. */
      53  void NOINLINE 
      54  unpack_ph_2twops(V512 src, V512 *op1, V512 *op2)
      55  {
      56      V512 v1;
      57  
      58      op1->zmm = _mm512_cvtph_ps(src.ymmi[0]);
      59      v1.ymm[0] = _mm512_extractf32x8_ps(src.zmm, 1);
      60      op2->zmm = _mm512_cvtph_ps(v1.ymmi[0]);
      61  }
      62  
      63  V512 NOINLINE
      64  pack_twops_2ph(V512 op1, V512 op2)
      65  {
      66      V512 v1, v2, v3;
      67  
      68      v1.ymmi[0] = _mm512_cvtps_ph(op1.zmm, _MM_FROUND_TO_NEAREST_INT);
      69      v2.ymmi[0] = _mm512_cvtps_ph(op2.zmm, _MM_FROUND_TO_NEAREST_INT);
      70  
      71      v3.zmm = _mm512_insertf32x8(v1.zmm, v2.ymm[0], 1);
      72  
      73      return v3;
      74  }
      75  
      76  /* Helper function used for result debugging */
      77  #ifdef DEBUG
      78  void NOINLINE
      79  display_ps(const void *p, const char *banner, int n_elems)
      80  {
      81      int i;
      82      V512 *v = (V512*)p;
      83  
      84      if (banner) {
      85          printf("%s", banner);
      86      }
      87  
      88      for (i = 15; i >= n_elems; i--) {
      89          printf(" --------");
      90          if (i == 8) {
      91              printf("\n");
      92              if (banner) {
      93                  printf("%*s", (int)strlen(banner), "");
      94              }
      95          }
      96      }
      97  
      98      for (; i >= 0; i--) {
      99          printf(" %x", v->u32[i]);
     100          if (i == 8) {
     101              printf("\n");
     102              if (banner) {
     103                  printf("%*s", (int)strlen(banner), "");
     104              }
     105          }
     106      }
     107      printf("\n");
     108  }
     109  #endif
     110  
     111  /* Functions/macros used for init/result checking.
     112     Only check components within AVX512F_LEN.  */
     113  #define TO_STRING(x) #x
     114  #define STRINGIFY(x) TO_STRING(x)
     115  #define NAME_OF(NAME) STRINGIFY(INTRINSIC (NAME))
     116  
     117  #define CHECK_RESULT(res, exp, size, intrin) \
     118    check_results ((void*)res, (void*)exp, size,\
     119  		 NAME_OF(intrin))
     120  
     121  #define CHECK_RESULT_MASK(res, exp, size, intrin) \
     122    check_results_mask ((__mmask32)res, (__mmask32)exp, size,\
     123  		 NAME_OF(intrin))
     124  
     125  /* To evaluate whether result match _Float16 precision,
     126     only the last bit of real/emulate result could be
     127     different.  */
     128  void NOINLINE
     129  check_results(void *got, void *exp, int n_elems, char *banner)
     130  {
     131      int i;
     132      V512 *v1 = (V512*)got;
     133      V512 *v2 = (V512*)exp;
     134  
     135      for (i = 0; i < n_elems; i++) {
     136          if (v1->u16[i] != v2->u16[i] &&
     137              ((v1->u16[i] > (v2->u16[i] + 1)) ||
     138               (v1->u16[i] < (v2->u16[i] - 1)))) {
     139  
     140  #ifdef DEBUG
     141              printf("ERROR: %s failed at %d'th element: %x(%f) != %x(%f)\n",
     142                     banner ? banner : "", i,
     143                     v1->u16[i], *(float *)(&v1->u16[i]),
     144                     v2->u16[i], *(float *)(&v2->u16[i]));
     145              display_ps(got, "got:", n_elems);
     146              display_ps(exp, "exp:", n_elems);
     147  #endif
     148              n_errs++;
     149              break;
     150          }
     151      }
     152  }
     153  
     154  void NOINLINE
     155  check_results_mask(__mmask32 got, __mmask32 exp, int n_elems, char *banner)
     156  {
     157    if (got != exp) {
     158  #ifdef DEBUG
     159        printf("ERROR: %s failed : got mask %x != exp mask %x\n",
     160  	     banner ? banner : "", got, exp);
     161  #endif
     162        n_errs++;
     163    }
     164  }
     165  
     166  /* Functions for src/dest initialization */
     167  void NOINLINE
     168  init_src()
     169  {
     170      V512 v1, v2, v3, v4;
     171      int i;
     172  
     173      for (i = 0; i < AVX512F_MAX_ELEM; i++) {
     174  	v1.f32[i] = i + 1;
     175  	v2.f32[i] = (i + 2) * 0.5f;
     176  	v3.f32[i] = i * 1.5f;
     177  	v4.f32[i] = i - 1.5f;
     178  
     179  	src3.u32[i] = (i + 1) * 10;
     180      }
     181  
     182      for (i = 0; i < 8; i++) {
     183  	src3f.f64[i] = (i + 1) * 7.5;
     184      }
     185  
     186      src1 = pack_twops_2ph(v1, v2);
     187      src2 = pack_twops_2ph(v3, v4);
     188  }
     189  
     190  void NOINLINE
     191  init_src_nanf()
     192  {
     193    V512 v1, v2, v3, v4;
     194    int i;
     195  
     196    for (i = 0; i < 16; i++) {
     197      v1.f32[i] = i + 1 + 0.5;
     198      v2.f32[i] = i + 17 + 0.5;
     199      v3.f32[i] = i * 2 + 2 + 0.5;
     200      v4.f32[i] = i * 2 + 34 + 0.5;
     201  
     202      src3.u32[i] = (i + 1) * 10;
     203    }
     204  
     205    v1.f32[0] = __builtin_nanf("");
     206    src1 = pack_twops_2ph(v1, v2);
     207    src2 = pack_twops_2ph(v3, v4);
     208  }
     209  
     210  
     211  void NOINLINE
     212  init_dest(V512 * res, V512 * exp)
     213  {
     214      int i;
     215      V512 v1;
     216  
     217      for (i = 0; i < AVX512F_MAX_ELEM; i++) {
     218          v1.f32[i] = 12 + 0.5f * i;
     219      }
     220      *res = *exp = pack_twops_2ph(v1, v1);
     221  }
     222  
     223  #define EMULATE(NAME) EVAL(emulate_, NAME, AVX512F_LEN)
     224  
     225  #endif /* AVX512FP16_HELPER_INCLUDED */
     226  
     227  /* Macros for AVX512VL Testing. Include V512 component usage
     228     and mask type for emulation. */
     229  
     230  #if AVX512F_LEN == 256
     231  #undef HF
     232  #undef SF
     233  #undef SI
     234  #undef DF
     235  #undef H_HF
     236  #undef NET_MASK 
     237  #undef NET_CMASK 
     238  #undef MASK_VALUE
     239  #undef HALF_MASK
     240  #undef ZMASK_VALUE 
     241  #define NET_MASK 0xffff
     242  #define NET_CMASK 0xff
     243  #define MASK_VALUE 0xcccc
     244  #define ZMASK_VALUE 0xfcc1
     245  #define HALF_MASK 0xcc
     246  #define HF(x) x.ymmh[0]
     247  #define H_HF(x) x.xmmh[0]
     248  #define SF(x) x.ymm[0]
     249  #define DF(x) x.ymmd[0]
     250  #define SI(x) x.ymmi[0]
     251  #elif AVX512F_LEN == 128
     252  #undef HF
     253  #undef SF
     254  #undef DF
     255  #undef SI
     256  #undef H_HF
     257  #undef NET_MASK 
     258  #undef NET_CMASK 
     259  #undef MASK_VALUE 
     260  #undef ZMASK_VALUE 
     261  #undef HALF_MASK
     262  #define NET_MASK 0xff
     263  #define NET_CMASK 0xff
     264  #define MASK_VALUE 0xcc
     265  #define HALF_MASK MASK_VALUE
     266  #define ZMASK_VALUE 0xc1
     267  #define HF(x) x.xmmh[0]
     268  #define SF(x) x.xmm[0]
     269  #define DF(x) x.xmmd[0]
     270  #define SI(x) x.xmmi[0]
     271  #define H_HF(x) x.xmmh[0]
     272  #else
     273  #define NET_MASK 0xffffffff
     274  #define NET_CMASK 0xffff
     275  #define MASK_VALUE 0xcccccccc
     276  #define ZMASK_VALUE 0xfcc1fcc1
     277  #define HALF_MASK 0xcccc
     278  #define HF(x) x.zmmh
     279  #define SF(x) x.zmm
     280  #define DF(x) x.zmmd
     281  #define SI(x) x.zmmi
     282  #define H_HF(x) x.ymmh[0]
     283  #endif
     284