(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
sve/
mask_struct_load_3.c
       1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
       3  
       4  #include <stdint.h>
       5  
       6  #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE)		\
       7    void __attribute__ ((noinline, noclone))			\
       8    NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src,	\
       9  	    MASKTYPE *__restrict cond, intptr_t n)		\
      10    {								\
      11      for (intptr_t i = 0; i < n; ++i)				\
      12        if (cond[i])						\
      13  	dest[i] = (src[i * 4]					\
      14  		   + src[i * 4 + 1]				\
      15  		   + src[i * 4 + 2]				\
      16  		   + src[i * 4 + 3]);				\
      17    }
      18  
      19  #define TEST2(NAME, OUTTYPE, INTYPE) \
      20    TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
      21    TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
      22    TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
      23    TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
      24  
      25  #define TEST1(NAME, OUTTYPE) \
      26    TEST2 (NAME##_i8, OUTTYPE, int8_t) \
      27    TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
      28    TEST2 (NAME##_i32, OUTTYPE, int32_t) \
      29    TEST2 (NAME##_i64, OUTTYPE, uint64_t)
      30  
      31  #define TEST(NAME) \
      32    TEST1 (NAME##_i8, int8_t) \
      33    TEST1 (NAME##_i16, uint16_t) \
      34    TEST1 (NAME##_i32, int32_t) \
      35    TEST1 (NAME##_i64, uint64_t) \
      36    TEST2 (NAME##_f16_f16, _Float16, _Float16) \
      37    TEST2 (NAME##_f32_f32, float, float) \
      38    TEST2 (NAME##_f64_f64, double, double)
      39  
      40  TEST (test)
      41  
      42  /*    Mask |  8 16 32 64
      43      -------+------------
      44      Out  8 |  1  1  1  1
      45          16 |  1  1  1  1
      46          32 |  1  1  1  1
      47          64 |  1  1  1  1.  */
      48  /* { dg-final { scan-assembler-times {\tld4b\t.z[0-9]} 16 } } */
      49  
      50  /*    Mask |  8 16 32 64
      51      -------+------------
      52      Out  8 |  2  2  2  2
      53          16 |  2  1  1  1 x2 (for half float)
      54          32 |  2  1  1  1
      55          64 |  2  1  1  1.  */
      56  /* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 28 } } */
      57  
      58  /*    Mask |  8 16 32 64
      59      -------+------------
      60      Out  8 |  4  4  4  4
      61          16 |  4  2  2  2
      62          32 |  4  2  1  1 x2 (for float)
      63          64 |  4  2  1  1.  */
      64  /* { dg-final { scan-assembler-times {\tld4w\t.z[0-9]} 50 } } */
      65  
      66  /*    Mask |  8 16 32 64
      67      -------+------------
      68      Out  8 |  8  8  8  8
      69          16 |  8  4  4  4
      70          32 |  8  4  2  2
      71          64 |  8  4  2  1 x2 (for double).  */
      72  /* { dg-final { scan-assembler-times {\tld4d\t.z[0-9]} 98 } } */