1  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
       2  /* { dg-options "-O2 -ftree-vectorize --save-temps" } */
       3  
       4  #include <stdint.h>
       5  
       6  #ifndef TYPE
       7  #define TYPE uint8_t
       8  #define ITYPE int8_t
       9  #endif
      10  
      11  void __attribute__ ((noinline, noclone))
      12  f2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n)
      13  {
      14    for (ITYPE i = 0; i < n; ++i)
      15      {
      16        a[i] = c[i * 2];
      17        b[i] = c[i * 2 + 1];
      18      }
      19  }
      20  
      21  void __attribute__ ((noinline, noclone))
      22  f3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      23      TYPE *__restrict d, ITYPE n)
      24  {
      25    for (ITYPE i = 0; i < n; ++i)
      26      {
      27        a[i] = d[i * 3];
      28        b[i] = d[i * 3 + 1];
      29        c[i] = d[i * 3 + 2];
      30      }
      31  }
      32  
      33  void __attribute__ ((noinline, noclone))
      34  f4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      35      TYPE *__restrict d, TYPE *__restrict e, ITYPE n)
      36  {
      37    for (ITYPE i = 0; i < n; ++i)
      38      {
      39        a[i] = e[i * 4];
      40        b[i] = e[i * 4 + 1];
      41        c[i] = e[i * 4 + 2];
      42        d[i] = e[i * 4 + 3];
      43      }
      44  }
      45  
      46  void __attribute__ ((noinline, noclone))
      47  g2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n)
      48  {
      49    for (ITYPE i = 0; i < n; ++i)
      50      {
      51        c[i * 2] = a[i];
      52        c[i * 2 + 1] = b[i];
      53      }
      54  }
      55  
      56  void __attribute__ ((noinline, noclone))
      57  g3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      58      TYPE *__restrict d, ITYPE n)
      59  {
      60    for (ITYPE i = 0; i < n; ++i)
      61      {
      62        d[i * 3] = a[i];
      63        d[i * 3 + 1] = b[i];
      64        d[i * 3 + 2] = c[i];
      65      }
      66  }
      67  
      68  void __attribute__ ((noinline, noclone))
      69  g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      70      TYPE *__restrict d, TYPE *__restrict e, ITYPE n)
      71  {
      72    for (ITYPE i = 0; i < n; ++i)
      73      {
      74        e[i * 4] = a[i];
      75        e[i * 4 + 1] = b[i];
      76        e[i * 4 + 2] = c[i];
      77        e[i * 4 + 3] = d[i];
      78      }
      79  }
      80  
      81  /* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
      82  /* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
      83  /* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
      84  /* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
      85  /* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
      86  /* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */