1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -ftree-vectorize" } */
       3  
       4  #include <stdint.h>
       5  
       6  #define TEST_LOOP(NAME, TYPE)					\
       7    void __attribute__ ((noinline, noclone))			\
       8    NAME (TYPE *restrict dest, TYPE *restrict src, int n)		\
       9    {								\
      10      for (int i = 0; i < n; ++i)					\
      11        dest[i] += src[i * 4];					\
      12    }
      13  
      14  #define TEST(NAME) \
      15    TEST_LOOP (NAME##_i8, int8_t) \
      16    TEST_LOOP (NAME##_i16, uint16_t) \
      17    TEST_LOOP (NAME##_f32, float) \
      18    TEST_LOOP (NAME##_f64, double)
      19  
      20  TEST (test)
      21  
      22  /* Check the vectorized loop.  */
      23  /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
      24  /* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */
      25  /* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */
      26  /* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */
      27  /* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */
      28  /* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */
      29  /* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
      30  /* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */
      31  /* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */
      32  /* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
      33  /* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */
      34  /* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */
      35  
      36  /* Check the scalar tail.  */
      37  /* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */
      38  /* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */
      39  /* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */
      40  /* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */
      41  /* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */
      42  /* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */
      43  /* { dg-final { scan-assembler-times {\tldr\td} 2 } } */
      44  /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */
      45  
      46  /* Each function should have three branches: one directly to the exit
      47     (n <= 0), one to the single scalar epilogue iteration (n == 1),
      48     and one branch-back for the vectorized loop.  */
      49  /* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */