(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
sve/
reduc_2.c
       1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
       3  
       4  #include <stdint.h>
       5  
       6  #define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
       7  
       8  #define DEF_REDUC_PLUS(TYPE)					\
       9  void __attribute__ ((noinline, noclone))			\
      10  reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)],	\
      11  		   TYPE *restrict r, int n)			\
      12  {								\
      13    for (int i = 0; i < n; i++)					\
      14      {								\
      15        r[i] = 0;							\
      16        for (int j = 0; j < NUM_ELEMS (TYPE); j++)		\
      17          r[i] += a[i][j];					\
      18      }								\
      19  }
      20  
      21  #define TEST_PLUS(T)				\
      22    T (int8_t)					\
      23    T (int16_t)					\
      24    T (int32_t)					\
      25    T (int64_t)					\
      26    T (uint8_t)					\
      27    T (uint16_t)					\
      28    T (uint32_t)					\
      29    T (uint64_t)					\
      30    T (_Float16)					\
      31    T (float)					\
      32    T (double)
      33  
      34  TEST_PLUS (DEF_REDUC_PLUS)
      35  
      36  #define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP)			\
      37  void __attribute__ ((noinline, noclone))			\
      38  reduc_##NAME##_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)],	\
      39  		       TYPE *restrict r, int n)			\
      40  {								\
      41    for (int i = 0; i < n; i++)					\
      42      {								\
      43        r[i] = a[i][0];						\
      44        for (int j = 0; j < NUM_ELEMS (TYPE); j++)		\
      45          r[i] = a[i][j] CMP_OP r[i] ? a[i][j] : r[i];		\
      46      }								\
      47  }
      48  
      49  #define TEST_MAXMIN(T)				\
      50    T (int8_t, max, >)				\
      51    T (int16_t, max, >)				\
      52    T (int32_t, max, >)				\
      53    T (int64_t, max, >)				\
      54    T (uint8_t, max, >)				\
      55    T (uint16_t, max, >)				\
      56    T (uint32_t, max, >)				\
      57    T (uint64_t, max, >)				\
      58    T (_Float16, max, >)				\
      59    T (float, max, >)				\
      60    T (double, max, >)				\
      61  						\
      62    T (int8_t, min, <)				\
      63    T (int16_t, min, <)				\
      64    T (int32_t, min, <)				\
      65    T (int64_t, min, <)				\
      66    T (uint8_t, min, <)				\
      67    T (uint16_t, min, <)				\
      68    T (uint32_t, min, <)				\
      69    T (uint64_t, min, <)				\
      70    T (_Float16, min, <)				\
      71    T (float, min, <)				\
      72    T (double, min, <)
      73  
      74  TEST_MAXMIN (DEF_REDUC_MAXMIN)
      75  
      76  #define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP)			\
      77  void __attribute__ ((noinline, noclone))			\
      78  reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)],	\
      79  		    TYPE *restrict r, int n)			\
      80  {								\
      81    for (int i = 0; i < n; i++)					\
      82      {								\
      83        r[i] = a[i][0];						\
      84        for (int j = 0; j < NUM_ELEMS(TYPE); j++)			\
      85          r[i] BIT_OP a[i][j];					\
      86      }								\
      87  }
      88  
      89  #define TEST_BITWISE(T)				\
      90    T (int8_t, and, &=)				\
      91    T (int16_t, and, &=)				\
      92    T (int32_t, and, &=)				\
      93    T (int64_t, and, &=)				\
      94    T (uint8_t, and, &=)				\
      95    T (uint16_t, and, &=)				\
      96    T (uint32_t, and, &=)				\
      97    T (uint64_t, and, &=)				\
      98  						\
      99    T (int8_t, ior, |=)				\
     100    T (int16_t, ior, |=)				\
     101    T (int32_t, ior, |=)				\
     102    T (int64_t, ior, |=)				\
     103    T (uint8_t, ior, |=)				\
     104    T (uint16_t, ior, |=)				\
     105    T (uint32_t, ior, |=)				\
     106    T (uint64_t, ior, |=)				\
     107  						\
     108    T (int8_t, xor, ^=)				\
     109    T (int16_t, xor, ^=)				\
     110    T (int32_t, xor, ^=)				\
     111    T (int64_t, xor, ^=)				\
     112    T (uint8_t, xor, ^=)				\
     113    T (uint16_t, xor, ^=)				\
     114    T (uint32_t, xor, ^=)				\
     115    T (uint64_t, xor, ^=)
     116  
     117  TEST_BITWISE (DEF_REDUC_BITWISE)
     118  
     119  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
     120  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
     121  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
     122  /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
     123  /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     124  /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     125  /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
     126  
     127  /* { dg-final { scan-assembler-times {\tsmaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
     128  /* { dg-final { scan-assembler-times {\tsmaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     129  /* { dg-final { scan-assembler-times {\tsmaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     130  /* { dg-final { scan-assembler-times {\tsmaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
     131  /* { dg-final { scan-assembler-times {\tumaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
     132  /* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     133  /* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     134  /* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
     135  /* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     136  /* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     137  /* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
     138  
     139  /* { dg-final { scan-assembler-times {\tsminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
     140  /* { dg-final { scan-assembler-times {\tsminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     141  /* { dg-final { scan-assembler-times {\tsminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     142  /* { dg-final { scan-assembler-times {\tsminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
     143  /* { dg-final { scan-assembler-times {\tuminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
     144  /* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     145  /* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     146  /* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
     147  /* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     148  /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     149  /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
     150  
     151  /* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
     152  /* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
     153  /* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
     154  /* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
     155  
     156  /* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
     157  /* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
     158  /* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
     159  /* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
     160  
     161  /* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
     162  /* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
     163  /* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
     164  /* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */