1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
       3  
       4  #include <stdint.h>
       5  
       6  #define add(A, B) ((A) + (B))
       7  #define sub(A, B) ((A) - (B))
       8  #define mul(A, B) ((A) * (B))
       9  #define div(A, B) ((A) / (B))
      10  #define max(A, B) ((A) > (B) ? (A) : (B))
      11  #define min(A, B) ((A) < (B) ? (A) : (B))
      12  #define and(A, B) ((A) & (B))
      13  #define ior(A, B) ((A) | (B))
      14  #define xor(A, B) ((A) ^ (B))
      15  
      16  #define DEF_LOOP(TYPE, CMPTYPE, OP)				\
      17    void __attribute__((noipa))					\
      18    f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,	\
      19  		   CMPTYPE limit, TYPE *restrict src1,		\
      20  		   TYPE *restrict src2, unsigned int n)		\
      21    {								\
      22      for (unsigned int i = 0; i < n; ++i)			\
      23        {								\
      24  	TYPE truev = OP (src1[i], src2[i]);			\
      25  	dest[i] = cond[i] < limit ? truev : src2[i];		\
      26        }								\
      27    }
      28  
      29  #define FOR_EACH_INT_TYPE(T, TYPE) \
      30    T (TYPE, TYPE, add) \
      31    T (TYPE, TYPE, sub) \
      32    T (TYPE, TYPE, mul) \
      33    T (TYPE, TYPE, max) \
      34    T (TYPE, TYPE, min) \
      35    T (TYPE, TYPE, and) \
      36    T (TYPE, TYPE, ior) \
      37    T (TYPE, TYPE, xor)
      38  
      39  #define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
      40    T (TYPE, CMPTYPE, add) \
      41    T (TYPE, CMPTYPE, sub) \
      42    T (TYPE, CMPTYPE, mul) \
      43    T (TYPE, CMPTYPE, div) \
      44    T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
      45    T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
      46  
      47  #define FOR_EACH_LOOP(T) \
      48    FOR_EACH_INT_TYPE (T, int8_t) \
      49    FOR_EACH_INT_TYPE (T, int16_t) \
      50    FOR_EACH_INT_TYPE (T, int32_t) \
      51    FOR_EACH_INT_TYPE (T, int64_t) \
      52    FOR_EACH_INT_TYPE (T, uint8_t) \
      53    FOR_EACH_INT_TYPE (T, uint16_t) \
      54    FOR_EACH_INT_TYPE (T, uint32_t) \
      55    FOR_EACH_INT_TYPE (T, uint64_t) \
      56    FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
      57    FOR_EACH_FP_TYPE (T, float, float, f32) \
      58    FOR_EACH_FP_TYPE (T, double, double, f64)
      59  
      60  FOR_EACH_LOOP (DEF_LOOP)
      61  
      62  /* { dg-final { scan-assembler-not {\tsel\t} } } */
      63  /* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
      64  
      65  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
      66  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
      67  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
      68  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
      69  
      70  /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
      71  /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
      72  /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
      73  /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
      74  
      75  /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
      76  /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
      77  /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
      78  /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
      79  
      80  /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
      81  /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
      82  /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
      83  /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
      84  
      85  /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
      86  /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
      87  /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
      88  /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
      89  
      90  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
      91  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
      92  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
      93  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
      94  
      95  /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
      96  /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
      97  /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
      98  /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
      99  
     100  /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
     101  /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
     102  /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
     103  /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
     104  
     105  /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
     106  /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
     107  /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
     108  /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
     109  
     110  /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
     111  /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
     112  /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
     113  /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
     114  
     115  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
     116  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
     117  /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
     118  
     119  /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
     120  /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
     121  /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
     122  
     123  /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
     124  /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
     125  /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
     126  
     127  /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
     128  /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
     129  /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
     130  
     131  /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
     132  /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
     133  /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
     134  
     135  /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
     136  /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
     137  /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */