1  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
       2  /* { dg-options "-O2 -ftree-vectorize --save-temps" } */
       3  
       4  #define INDEX8 uint8_t
       5  #define INDEX16 uint16_t
       6  #define INDEX32 uint32_t
       7  #define INDEX64 uint64_t
       8  
       9  #include "strided_store_1.c"
      10  
      11  /* 8 and 16 bits are signed because the multiplication promotes to int.
      12     Using uxtw for all 9 would be OK.  */
      13  /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
      14  /* The 32-bit loop needs to honor the defined overflow in uint32_t,
      15     so we vectorize the offset calculation.  This means that the
      16     64-bit version needs two copies.  */
      17  /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
      18  /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */