1  /* { dg-options "-O2 -ftree-vectorize" } */
       2  
       3  #include <stdint.h>
       4  
       5  #define TEST_LOOP(TYPE1, TYPE2)						\
       6    void									\
       7    f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1,	\
       8  		       TYPE2 *restrict src2, uint32_t *restrict index,	\
       9  		       int n)						\
      10    {									\
      11      for (int i = 0; i < n; ++i)						\
      12        dst[i] += src1[i] + src2[index[i]];				\
      13    }
      14  
      15  #define TEST_ALL(T) \
      16    T (uint16_t, uint8_t) \
      17    T (uint32_t, uint8_t) \
      18    T (uint64_t, uint8_t) \
      19    T (uint32_t, uint16_t) \
      20    T (uint64_t, uint16_t) \
      21    T (uint64_t, uint32_t)
      22  
      23  TEST_ALL (TEST_LOOP)
      24  
      25  /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]\n} 2 } } */
      26  /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 1 } } */
      27  /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 1\]\n} 1 } } */
      28  /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 1\]\n} 1 } } */
      29  /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 2\]\n} 1 } } */
      30  
      31  /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 7 } } */
      32  /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 3 } } */
      33  
      34  /* { dg-final { scan-assembler-not {\tuxt.\t} } } */