(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
avx5124vnniw-vp4dpwssd-2.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O2 -mavx5124vnniw" } */
       3  /* { dg-require-effective-target avx5124vnniw } */
       4  
       5  #define AVX5124VNNIW
       6  #include "avx512f-helper.h"
       7  
       8  #define SIZE (AVX512F_LEN / 32)
       9  
      10  #include "avx512f-mask-type.h"
      11  
      12  void
      13  CALC (short *src1, short* src2, short *src3,
      14        short *src4, int* prev_dst, short *mult, int *dst)
      15  {
      16    int i;
      17  
      18    for (i = 0; i < SIZE; i++)
      19      {
      20        int p1dword, p2dword;
      21        dst[i] = prev_dst[i];
      22        p1dword = (int)(src1[2*i  ]) * (int)(mult[0]);
      23        p2dword = (int)(src1[2*i+1]) * (int)(mult[1]);
      24        dst[i] += p1dword + p2dword;
      25  
      26        p1dword = (int)(src2[2*i  ]) * (int)(mult[2]);
      27        p2dword = (int)(src2[2*i+1]) * (int)(mult[3]);
      28        dst[i] += p1dword + p2dword;
      29  
      30        p1dword = (int)(src3[2*i  ]) * (int)(mult[4]);
      31        p2dword = (int)(src3[2*i+1]) * (int)(mult[5]);
      32        dst[i] += p1dword + p2dword;
      33  
      34        p1dword = (int)(src4[2*i  ]) * (int)(mult[6]);
      35        p2dword = (int)(src4[2*i+1]) * (int)(mult[7]);
      36        dst[i] += p1dword + p2dword;
      37      }
      38  }
      39  
      40  void
      41  TEST (void)
      42  {
      43    int i;
      44    UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4;
      45    UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3;
      46    UNION_TYPE (128, i_w) mult;
      47    MASK_TYPE mask = MASK_VALUE;
      48    int res_ref[SIZE];
      49  
      50    for (i = 0; i < SIZE * 2; i++)
      51      {
      52        src1.a[i] = 2 + 7 * i % 291;
      53        src2.a[i] = 3 + 11 * (i % 377) * i;
      54        src3.a[i] = src1.a[i] * src1.a[i];
      55        src4.a[i] = src2.a[i] * src2.a[i];
      56      }
      57    for (i = 0; i < 8; i++)
      58      mult.a[i] = 3 + i * 2;
      59  
      60    for (i = 0; i < SIZE; i++)
      61      src5.a[i] = DEFAULT_VALUE;
      62  
      63    CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
      64  
      65    res1.x = INTRINSIC (_4dpwssd_epi32)       (      src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
      66    res2.x = INTRINSIC (_mask_4dpwssd_epi32)  (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
      67    res3.x = INTRINSIC (_maskz_4dpwssd_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
      68  
      69    if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
      70      abort ();
      71  
      72    MASK_MERGE (i_d) (res_ref, mask, SIZE);
      73    if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
      74      abort ();
      75  
      76    MASK_ZERO (i_d) (res_ref, mask, SIZE);
      77    if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
      78      abort ();
      79  }