1  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
       2  /* { dg-options "-O2 -ftree-vectorize --save-temps" } */
       3  
       4  #include <stdint.h>
       5  
       6  #ifndef TYPE
       7  #define TYPE uint8_t
       8  #endif
       9  
      10  #ifndef NAME
      11  #define NAME(X) X
      12  #endif
      13  
      14  #ifndef N
      15  #define N 1024
      16  #endif
      17  
      18  void __attribute__ ((noinline, noclone))
      19  NAME(f2) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c)
      20  {
      21    for (int i = 0; i < N; ++i)
      22      {
      23        a[i] = c[i * 2];
      24        b[i] = c[i * 2 + 1];
      25      }
      26  }
      27  
      28  void __attribute__ ((noinline, noclone))
      29  NAME(f3) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      30  	  TYPE *__restrict d)
      31  {
      32    for (int i = 0; i < N; ++i)
      33      {
      34        a[i] = d[i * 3];
      35        b[i] = d[i * 3 + 1];
      36        c[i] = d[i * 3 + 2];
      37      }
      38  }
      39  
      40  void __attribute__ ((noinline, noclone))
      41  NAME(f4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      42  	  TYPE *__restrict d, TYPE *__restrict e)
      43  {
      44    for (int i = 0; i < N; ++i)
      45      {
      46        a[i] = e[i * 4];
      47        b[i] = e[i * 4 + 1];
      48        c[i] = e[i * 4 + 2];
      49        d[i] = e[i * 4 + 3];
      50      }
      51  }
      52  
      53  void __attribute__ ((noinline, noclone))
      54  NAME(g2) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c)
      55  {
      56    for (int i = 0; i < N; ++i)
      57      {
      58        c[i * 2] = a[i];
      59        c[i * 2 + 1] = b[i];
      60      }
      61  }
      62  
      63  void __attribute__ ((noinline, noclone))
      64  NAME(g3) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      65  	  TYPE *__restrict d)
      66  {
      67    for (int i = 0; i < N; ++i)
      68      {
      69        d[i * 3] = a[i];
      70        d[i * 3 + 1] = b[i];
      71        d[i * 3 + 2] = c[i];
      72      }
      73  }
      74  
      75  void __attribute__ ((noinline, noclone))
      76  NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
      77  	  TYPE *__restrict d, TYPE *__restrict e)
      78  {
      79    for (int i = 0; i < N; ++i)
      80      {
      81        e[i * 4] = a[i];
      82        e[i * 4 + 1] = b[i];
      83        e[i * 4 + 2] = c[i];
      84        e[i * 4 + 3] = d[i];
      85      }
      86  }
      87  
      88  /* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
      89  /* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
      90  /* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
      91  /* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
      92  /* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
      93  /* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */