1  /* { dg-do run } */
       2  /* { dg-options "-O2 -mprefer-vector-width=512 -mavx512vl -mavx512bw" } */
       3  /* { dg-require-effective-target avx512bw } */
       4  /* { dg-require-effective-target avx512vl } */
       5  
       6  #include "pr98434-1.c"
       7  void test (void);
       8  #define DO_TEST test
       9  #define AVX512VL
      10  #define AVX512BW
      11  #include "avx512-check.h"
      12  
      13  
      14  typedef char int8;
      15  typedef unsigned char uint8;
      16  typedef short int16;
      17  typedef unsigned short uint16;
      18  typedef long long int64;
      19  typedef unsigned long long uint64;
      20  
      21  #define F_EMULATE(TYPE, SIZE, OP, NAME)		\
      22    __attribute__((noipa, optimize("-fno-tree-vectorize"))) void	\
      23    emulate_##SIZE##_##TYPE##_##NAME (TYPE *a,	\
      24  				    TYPE *b,	\
      25  				    TYPE *c)	\
      26    {						\
      27      int i;					\
      28      for (i = 0; i < SIZE; i++)			\
      29        {						\
      30  	a[i] = b[i] OP c[i];			\
      31        }						\
      32    }
      33  
      34  F_EMULATE (int8,  8, <<, vashl);
      35  F_EMULATE (int8,  8,  >>, vashr);
      36  F_EMULATE (uint8,  8, >>, vlshr);
      37  F_EMULATE (int8,  16, <<, vashl);
      38  F_EMULATE (int8,  16, >>, vashr);
      39  F_EMULATE (uint8,  16, >>, vlshr);
      40  F_EMULATE (int8,  32, <<, vashl);
      41  F_EMULATE (int8,  32, >>, vashr);
      42  F_EMULATE (uint8,  32, >>, vlshr);
      43  F_EMULATE (int16,  8, <<, vashl);
      44  F_EMULATE (int16,  8, >>, vashr);
      45  F_EMULATE (uint16, 8, >>, vlshr);
      46  F_EMULATE (int16,  16, <<, vashl);
      47  F_EMULATE (int16,  16, >>, vashr);
      48  F_EMULATE (uint16, 16, >>, vlshr);
      49  F_EMULATE (int16,  32, <<, vashl);
      50  F_EMULATE (int16,  32, >>, vashr);
      51  F_EMULATE (uint16, 32, >>, vlshr);
      52  F_EMULATE (int64,  2, <<, vashl);
      53  F_EMULATE (int64,  2, >>, vashr);
      54  F_EMULATE (uint64,  2, >>, vlshr);
      55  F_EMULATE (int64,  4, <<, vashl);
      56  F_EMULATE (int64,  4, >>, vashr);
      57  F_EMULATE (uint64,  4, >>, vlshr);
      58  F_EMULATE (int64,  8, <<, vashl);
      59  F_EMULATE (int64,  8, >>, vashr);
      60  F_EMULATE (uint64,  8, >>, vlshr);
      61  
      62  #define VSHIFT(VTYPE, NAME, src1, src2)	\
      63    foo_##VTYPE##_##NAME (src1, src2)
      64  
      65  #define EMULATE(SIZE, TYPE, NAME, dst, src1, src2)	\
      66    emulate_##SIZE##_##TYPE##_##NAME (dst, src1, src2)
      67  
      68  #define F_TEST_SHIFT(VTYPE, VTYPEU, TYPE, TYPEU, SIZE)    \
      69    __attribute__((noipa, optimize("-fno-tree-vectorize"))) void \
      70    test_##VTYPE ()\
      71    {\
      72      TYPE src1[SIZE], src2[SIZE], ref[SIZE];		\
      73      TYPEU usrc1[SIZE], usrc2[SIZE], uref[SIZE];			\
      74      VTYPE dst;	     \
      75      VTYPEU udst;     \
      76      int i;\
      77      for (i = 0; i < SIZE; i++)\
      78      {\
      79        dst[i] = ref[i] = -i; \
      80        src1[i] = -(i + SIZE);			\
      81        src2[i] = i % 8;			\
      82        udst[i] = uref[i] = i;			\
      83        usrc1[i] = (i + SIZE);			\
      84        usrc2[i] = (i % 8);			\
      85      }\
      86      EMULATE(SIZE, TYPE, vashl, ref, src1, src2);	\
      87      dst = VSHIFT(VTYPE, vashl, *((VTYPE* )&src1[0]), *((VTYPE*) &src2[0])); \
      88      for (i = 0; i < SIZE; i++)\
      89      {\
      90        if(dst[i] != ref[i]) __builtin_abort();\
      91      }\
      92      EMULATE(SIZE, TYPE, vashr, ref, src1, src2);	\
      93      dst = VSHIFT(VTYPE, vashr, *((VTYPE* )&src1[0]), *((VTYPE*) &src2[0])); \
      94      for (i = 0; i < SIZE; i++)\
      95      {\
      96        if(dst[i] != ref[i]) __builtin_abort();\
      97      }\
      98      EMULATE(SIZE, TYPEU, vlshr, uref, usrc1, usrc2);	\
      99      udst = VSHIFT(VTYPEU, vlshr, *((VTYPEU* )&usrc1[0]), *((VTYPEU*) &usrc2[0])); \
     100      for (i = 0; i < SIZE; i++)\
     101      {\
     102        if(udst[i] != uref[i]) __builtin_abort();\
     103      }\
     104    }
     105  
     106  F_TEST_SHIFT (v8qi, v8uqi, int8, uint8, 8);
     107  F_TEST_SHIFT (v16qi, v16uqi, int8, uint8, 16);
     108  F_TEST_SHIFT (v32qi, v32uqi, int8, uint8, 32);
     109  F_TEST_SHIFT (v8hi, v8uhi, int16, uint16, 8);
     110  F_TEST_SHIFT (v16hi, v16uhi, int16, uint16, 16);
     111  F_TEST_SHIFT (v32hi, v32uhi, int16, uint16, 32);
     112  F_TEST_SHIFT (v2di, v2udi, int64, uint64, 2);
     113  F_TEST_SHIFT (v4di, v4udi, int64, uint64, 4);
     114  F_TEST_SHIFT (v8di, v8udi, int64, uint64, 8);
     115  
     116  
     117  void
     118  test (void)
     119  {
     120    test_v8qi ();
     121    test_v16qi ();
     122    test_v32qi ();
     123    test_v8hi ();
     124    test_v16hi ();
     125    test_v32hi ();
     126    test_v2di ();
     127    test_v4di ();
     128    test_v8di ();
     129  }