(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
sve/
mask_load_slp_1.c
       1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
       3  
       4  #include <stdint.h>
       5  
       6  #define MASK_SLP_2(TYPE_COND, ALT_VAL)					\
       7  void __attribute__ ((noinline, noclone))				\
       8  mask_slp_##TYPE_COND##_2_##ALT_VAL (int *restrict x, int *restrict y,	\
       9  				    TYPE_COND *restrict z, int n)	\
      10  {									\
      11    for (int i = 0; i < n; i += 2)					\
      12      {									\
      13        x[i] = y[i] ? z[i] : 1;						\
      14        x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
      15      }									\
      16  }
      17  
      18  #define MASK_SLP_4(TYPE_COND, ALT_VAL)					\
      19  void __attribute__ ((noinline, noclone))				\
      20  mask_slp_##TYPE_COND##_4_##ALT_VAL (int *restrict x, int *restrict y,	\
      21  				    TYPE_COND *restrict z, int n)	\
      22  {									\
      23    for (int i = 0; i < n; i += 4)					\
      24      {									\
      25        x[i] = y[i] ? z[i] : 1;						\
      26        x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
      27        x[i + 2] = y[i + 2] ? z[i + 2] : 1;				\
      28        x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL;				\
      29      }									\
      30  }
      31  
      32  #define MASK_SLP_8(TYPE_COND, ALT_VAL)					\
      33  void __attribute__ ((noinline, noclone))				\
      34  mask_slp_##TYPE_COND##_8_##ALT_VAL (int *restrict x, int *restrict y,	\
      35  				    TYPE_COND *restrict z, int n)	\
      36  {									\
      37    for (int i = 0; i < n; i += 8)					\
      38      {									\
      39        x[i] = y[i] ? z[i] : 1;						\
      40        x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
      41        x[i + 2] = y[i + 2] ? z[i + 2] : 1;				\
      42        x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL;				\
      43        x[i + 4] = y[i + 4] ? z[i + 4] : 1;				\
      44        x[i + 5] = y[i + 5] ? z[i + 5] : ALT_VAL;				\
      45        x[i + 6] = y[i + 6] ? z[i + 6] : 1;				\
      46        x[i + 7] = y[i + 7] ? z[i + 7] : ALT_VAL;				\
      47      }									\
      48  }
      49  
      50  #define MASK_SLP_FAIL(TYPE_COND)					\
      51  void __attribute__ ((noinline, noclone))				\
      52  mask_slp_##TYPE_COND##_FAIL (int *restrict x, int *restrict y,		\
      53  			     TYPE_COND *restrict z, int n)		\
      54  {									\
      55    for (int i = 0; i < n; i += 2)					\
      56      {									\
      57        x[i] = y[i] ? z[i] : 1;						\
      58        x[i + 1] = y[i + 1] ? z[i + 1] : x[z[i + 1]];			\
      59      }									\
      60  }
      61  
      62  MASK_SLP_2(int8_t, 1)
      63  MASK_SLP_2(int8_t, 2)
      64  MASK_SLP_2(int, 1)
      65  MASK_SLP_2(int, 2)
      66  MASK_SLP_2(int64_t, 1)
      67  MASK_SLP_2(int64_t, 2)
      68  
      69  MASK_SLP_4(int8_t, 1)
      70  MASK_SLP_4(int8_t, 2)
      71  MASK_SLP_4(int, 1)
      72  MASK_SLP_4(int, 2)
      73  MASK_SLP_4(int64_t, 1)
      74  MASK_SLP_4(int64_t, 2)
      75  
      76  MASK_SLP_8(int8_t, 1)
      77  MASK_SLP_8(int8_t, 2)
      78  MASK_SLP_8(int, 1)
      79  MASK_SLP_8(int, 2)
      80  MASK_SLP_8(int64_t, 1)
      81  MASK_SLP_8(int64_t, 2)
      82  
      83  MASK_SLP_FAIL(int8_t)
      84  MASK_SLP_FAIL(int)
      85  MASK_SLP_FAIL(int64_t)
      86  
      87  /* { dg-final { scan-assembler-not {\tld2w\t} } } */
      88  /* { dg-final { scan-assembler-not {\tst2w\t} } } */
      89  /* { dg-final { scan-assembler-times {\tld1w\t} 48 } } */
      90  /* { dg-final { scan-assembler-times {\tst1w\t} 40 } } */