1  /* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
       2  
       3  #include <stdint.h>
       4  
       5  void
       6  f1 (uint64_t *restrict dst, uint16_t *src1, uint8_t *src2, uint32_t *index)
       7  {
       8    for (int i = 0; i < 7; ++i)
       9      dst[i] += (uint16_t) (src1[i] + src2[index[i]]);
      10  }
      11  
      12  void
      13  f2 (uint64_t *restrict dst, uint16_t *src1, uint8_t *src2, uint64_t *index)
      14  {
      15    for (int i = 0; i < 7; ++i)
      16      dst[i] += (uint16_t) (src1[i] + src2[index[i]]);
      17  }
      18  
      19  void
      20  f3 (uint64_t *restrict dst, uint16_t *src1, uint8_t **src2)
      21  {
      22    for (int i = 0; i < 7; ++i)
      23      dst[i] += (uint16_t) (src1[i] + *src2[i]);
      24  }
      25  
      26  /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d, p[0-7]/z, \[x2, z[0-9]+\.d\]\n} 2 } } */
      27  /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]\n} 1 } } */
      28  /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d, p[0-7]/z, \[x1\]\n} 3 } } */
      29  /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d, p[0-7]/z, \[x3\]\n} 1 } } */
      30  /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0\]\n} 3 } } */
      31  /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x2\]\n} 1 } } */
      32  /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x3\]\n} 1 } } */
      33  
      34  /* { dg-final { scan-assembler-times {\tadd\tz} 6 } } */
      35  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
      36  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
      37  
      38  /* { dg-final { scan-assembler-times {\tuxt.\t} 3 } } */
      39  /* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 3 } } */