1  /* { dg-do run } */
       2  /* { dg-options "-O2 -mavxvnni " } */
       3  /* { dg-require-effective-target avxvnni } */
       4  
       5  #ifndef CHECK
       6  #define CHECK "avx-check.h"
       7  #endif
       8  
       9  #ifndef TEST
      10  #define TEST avx_test
      11  #endif
      12  
      13  #include CHECK
      14  
      15  static void
      16  CALC (int *r, int *dst, unsigned char *s1, char *s2, int size)
      17  {
      18    short tempres[32];
      19    for (int i = 0; i < size; i++) {
      20      tempres[i] = ((unsigned short)(s1[i]) * (short)(s2[i]));
      21    }
      22    for (int i = 0; i < size / 4; i++) {
      23      long long test = (long long)dst[i] + tempres[i*4] + tempres[i*4 + 1] + tempres[i*4 + 2] + tempres[i*4 + 3];
      24      r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
      25    }
      26  }
      27  
      28  void
      29  TEST (void)
      30  {
      31    int i;
      32    union256i_d res_256;
      33    union256i_b src2_256;
      34    union256i_ub src1_256;
      35    int res_ref_256[8];
      36  
      37    if (!__builtin_cpu_supports ("avxvnni"))
      38      return;
      39  
      40    for (i = 0; i < 32; i++)
      41      {
      42        int sign = i % 2 ? 1 : -1;
      43        src1_256.a[i] = 10 + 3*i*i + sign;
      44        src2_256.a[i] = sign*10*i*i;
      45      }
      46  
      47    for (i = 0; i < 8; i++)
      48      res_256.a[i] = 0x7fffffff;
      49  
      50    CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
      51    res_256.x = _mm256_dpbusds_avx_epi32 (res_256.x, src1_256.x, src2_256.x);
      52    if (check_union256i_d (res_256, res_ref_256))
      53      abort ();
      54  
      55    union128i_d res_128;
      56    union128i_b src2_128;
      57    union128i_ub src1_128;
      58    int res_ref_128[4];
      59  
      60    for (i = 0; i < 16; i++)
      61      {
      62        int sign = i % 2 ? 1 : -1;
      63        src1_128.a[i] = 10 + 3*i*i + sign;
      64        src2_128.a[i] = sign*10*i*i;
      65      }
      66  
      67    for (i = 0; i < 4; i++)
      68      res_128.a[i] = 0x7fffffff;
      69  
      70    CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
      71    res_128.x = _mm_dpbusds_avx_epi32 (res_128.x, src1_128.x, src2_128.x);
      72    if (check_union128i_d (res_128, res_ref_128))
      73      abort ();
      74  }