(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
avx512f-reduce-op-1.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O2 -mavx512f" } */
       3  /* { dg-require-effective-target avx512f } */
       4  
       5  #define AVX512F
       6  
       7  #include "avx512f-helper.h"
       8  
       9  __attribute__((noinline, noclone)) int
      10  test_reduce_add_epi32 (__m512i a)
      11  {
      12    return _mm512_reduce_add_epi32 (a);
      13  }
      14  
      15  __attribute__((noinline, noclone)) int
      16  test_reduce_mul_epi32 (__m512i a)
      17  {
      18    return _mm512_reduce_mul_epi32 (a);
      19  }
      20  
      21  __attribute__((noinline, noclone)) int
      22  test_reduce_and_epi32 (__m512i a)
      23  {
      24    return _mm512_reduce_and_epi32 (a);
      25  }
      26  
      27  __attribute__((noinline, noclone)) int
      28  test_reduce_or_epi32 (__m512i a)
      29  {
      30    return _mm512_reduce_or_epi32 (a);
      31  }
      32  
      33  __attribute__((noinline, noclone)) int
      34  test_mask_reduce_add_epi32 (__mmask16 u, __m512i a)
      35  {
      36    return _mm512_mask_reduce_add_epi32 (u, a);
      37  }
      38  
      39  __attribute__((noinline, noclone)) int
      40  test_mask_reduce_mul_epi32 (__mmask16 u, __m512i a)
      41  {
      42    return _mm512_mask_reduce_mul_epi32 (u, a);
      43  }
      44  
      45  __attribute__((noinline, noclone)) int
      46  test_mask_reduce_and_epi32 (__mmask16 u, __m512i a)
      47  {
      48    return _mm512_mask_reduce_and_epi32 (u, a);
      49  }
      50  
      51  __attribute__((noinline, noclone)) int
      52  test_mask_reduce_or_epi32 (__mmask16 u, __m512i a)
      53  {
      54    return _mm512_mask_reduce_or_epi32 (u, a);
      55  }
      56  
      57  __attribute__((noinline, noclone)) int
      58  test_reduce_min_epi32 (__m512i a)
      59  {
      60    return _mm512_reduce_min_epi32 (a);
      61  }
      62  
      63  __attribute__((noinline, noclone)) int
      64  test_reduce_max_epi32 (__m512i a)
      65  {
      66    return _mm512_reduce_max_epi32 (a);
      67  }
      68  
      69  __attribute__((noinline, noclone)) unsigned int
      70  test_reduce_min_epu32 (__m512i a)
      71  {
      72    return _mm512_reduce_min_epu32 (a);
      73  }
      74  
      75  __attribute__((noinline, noclone)) unsigned int
      76  test_reduce_max_epu32 (__m512i a)
      77  {
      78    return _mm512_reduce_max_epu32 (a);
      79  }
      80  
      81  __attribute__((noinline, noclone)) int
      82  test_mask_reduce_min_epi32 (__mmask16 u, __m512i a)
      83  {
      84    return _mm512_mask_reduce_min_epi32 (u, a);
      85  }
      86  
      87  __attribute__((noinline, noclone)) int
      88  test_mask_reduce_max_epi32 (__mmask16 u, __m512i a)
      89  {
      90    return _mm512_mask_reduce_max_epi32 (u, a);
      91  }
      92  
      93  __attribute__((noinline, noclone)) unsigned int
      94  test_mask_reduce_min_epu32 (__mmask16 u, __m512i a)
      95  {
      96    return _mm512_mask_reduce_min_epu32 (u, a);
      97  }
      98  
      99  __attribute__((noinline, noclone)) unsigned int
     100  test_mask_reduce_max_epu32 (__mmask16 u, __m512i a)
     101  {
     102    return _mm512_mask_reduce_max_epu32 (u, a);
     103  }
     104  
     105  __attribute__((noinline, noclone)) float
     106  test_reduce_add_ps (__m512 a)
     107  {
     108    return _mm512_reduce_add_ps (a);
     109  }
     110  
     111  __attribute__((noinline, noclone)) float
     112  test_reduce_mul_ps (__m512 a)
     113  {
     114    return _mm512_reduce_mul_ps (a);
     115  }
     116  
     117  __attribute__((noinline, noclone)) float
     118  test_mask_reduce_add_ps (__mmask16 u, __m512 a)
     119  {
     120    return _mm512_mask_reduce_add_ps (u, a);
     121  }
     122  
     123  __attribute__((noinline, noclone)) float
     124  test_mask_reduce_mul_ps (__mmask16 u, __m512 a)
     125  {
     126    return _mm512_mask_reduce_mul_ps (u, a);
     127  }
     128  
     129  __attribute__((noinline, noclone)) float
     130  test_reduce_min_ps (__m512 a)
     131  {
     132    return _mm512_reduce_min_ps (a);
     133  }
     134  
     135  __attribute__((noinline, noclone)) float
     136  test_reduce_max_ps (__m512 a)
     137  {
     138    return _mm512_reduce_max_ps (a);
     139  }
     140  
     141  __attribute__((noinline, noclone)) float
     142  test_mask_reduce_min_ps (__mmask16 u, __m512 a)
     143  {
     144    return _mm512_mask_reduce_min_ps (u, a);
     145  }
     146  
     147  __attribute__((noinline, noclone)) float
     148  test_mask_reduce_max_ps (__mmask16 u, __m512 a)
     149  {
     150    return _mm512_mask_reduce_max_ps (u, a);
     151  }
     152  
     153  __attribute__((noinline, noclone)) long long
     154  test_reduce_add_epi64 (__m512i a)
     155  {
     156    return _mm512_reduce_add_epi64 (a);
     157  }
     158  
     159  __attribute__((noinline, noclone)) long long
     160  test_reduce_mul_epi64 (__m512i a)
     161  {
     162    return _mm512_reduce_mul_epi64 (a);
     163  }
     164  
     165  __attribute__((noinline, noclone)) long long
     166  test_reduce_and_epi64 (__m512i a)
     167  {
     168    return _mm512_reduce_and_epi64 (a);
     169  }
     170  
     171  __attribute__((noinline, noclone)) long long
     172  test_reduce_or_epi64 (__m512i a)
     173  {
     174    return _mm512_reduce_or_epi64 (a);
     175  }
     176  
     177  __attribute__((noinline, noclone)) long long
     178  test_mask_reduce_add_epi64 (__mmask8 u, __m512i a)
     179  {
     180    return _mm512_mask_reduce_add_epi64 (u, a);
     181  }
     182  
     183  __attribute__((noinline, noclone)) long long
     184  test_mask_reduce_mul_epi64 (__mmask8 u, __m512i a)
     185  {
     186    return _mm512_mask_reduce_mul_epi64 (u, a);
     187  }
     188  
     189  __attribute__((noinline, noclone)) long long
     190  test_mask_reduce_and_epi64 (__mmask8 u, __m512i a)
     191  {
     192    return _mm512_mask_reduce_and_epi64 (u, a);
     193  }
     194  
     195  __attribute__((noinline, noclone)) long long
     196  test_mask_reduce_or_epi64 (__mmask8 u, __m512i a)
     197  {
     198    return _mm512_mask_reduce_or_epi64 (u, a);
     199  }
     200  
     201  __attribute__((noinline, noclone)) long long
     202  test_reduce_min_epi64 (__m512i a)
     203  {
     204    return _mm512_reduce_min_epi64 (a);
     205  }
     206  
     207  __attribute__((noinline, noclone)) long long
     208  test_reduce_max_epi64 (__m512i a)
     209  {
     210    return _mm512_reduce_max_epi64 (a);
     211  }
     212  
     213  __attribute__((noinline, noclone)) unsigned long long
     214  test_reduce_min_epu64 (__m512i a)
     215  {
     216    return _mm512_reduce_min_epu64 (a);
     217  }
     218  
     219  __attribute__((noinline, noclone)) unsigned long long
     220  test_reduce_max_epu64 (__m512i a)
     221  {
     222    return _mm512_reduce_max_epu64 (a);
     223  }
     224  
     225  __attribute__((noinline, noclone)) long long
     226  test_mask_reduce_min_epi64 (__mmask8 u, __m512i a)
     227  {
     228    return _mm512_mask_reduce_min_epi64 (u, a);
     229  }
     230  
     231  __attribute__((noinline, noclone)) long long
     232  test_mask_reduce_max_epi64 (__mmask8 u, __m512i a)
     233  {
     234    return _mm512_mask_reduce_max_epi64 (u, a);
     235  }
     236  
     237  __attribute__((noinline, noclone)) unsigned long long
     238  test_mask_reduce_min_epu64 (__mmask8 u, __m512i a)
     239  {
     240    return _mm512_mask_reduce_min_epu64 (u, a);
     241  }
     242  
     243  __attribute__((noinline, noclone)) unsigned long long
     244  test_mask_reduce_max_epu64 (__mmask8 u, __m512i a)
     245  {
     246    return _mm512_mask_reduce_max_epu64 (u, a);
     247  }
     248  
     249  __attribute__((noinline, noclone)) double
     250  test_reduce_add_pd (__m512d a)
     251  {
     252    return _mm512_reduce_add_pd (a);
     253  }
     254  
     255  __attribute__((noinline, noclone)) double
     256  test_reduce_mul_pd (__m512d a)
     257  {
     258    return _mm512_reduce_mul_pd (a);
     259  }
     260  
     261  __attribute__((noinline, noclone)) double
     262  test_mask_reduce_add_pd (__mmask8 u, __m512d a)
     263  {
     264    return _mm512_mask_reduce_add_pd (u, a);
     265  }
     266  
     267  __attribute__((noinline, noclone)) double
     268  test_mask_reduce_mul_pd (__mmask8 u, __m512d a)
     269  {
     270    return _mm512_mask_reduce_mul_pd (u, a);
     271  }
     272  
     273  __attribute__((noinline, noclone)) double
     274  test_reduce_min_pd (__m512d a)
     275  {
     276    return _mm512_reduce_min_pd (a);
     277  }
     278  
     279  __attribute__((noinline, noclone)) double
     280  test_reduce_max_pd (__m512d a)
     281  {
     282    return _mm512_reduce_max_pd (a);
     283  }
     284  
     285  __attribute__((noinline, noclone)) double
     286  test_mask_reduce_min_pd (__mmask8 u, __m512d a)
     287  {
     288    return _mm512_mask_reduce_min_pd (u, a);
     289  }
     290  
     291  __attribute__((noinline, noclone)) double
     292  test_mask_reduce_max_pd (__mmask8 u, __m512d a)
     293  {
     294    return _mm512_mask_reduce_max_pd (u, a);
     295  }
     296  
     297  #define TESTOP(opname, op, type, suffix, neutral) \
     298    do {									\
     299      type r1 = _mm512_reduce_##opname##_##suffix (v.x);			\
     300      type r2 = test_reduce_##opname##_##suffix (v.x);			\
     301      type r3 = neutral;							\
     302      if (r1 != r2)							\
     303        __builtin_abort ();						\
     304      for (int i = 0; i < SIZE; i++)					\
     305        r3 = r3 op v.a[i];						\
     306      if (r1 != r3)							\
     307        __builtin_abort ();						\
     308      type r4 = _mm512_mask_reduce_##opname##_##suffix (MASK_VALUE, v.x);	\
     309      type r5 = test_mask_reduce_##opname##_##suffix (MASK_VALUE, v.x);	\
     310      if (r4 != r5)							\
     311        __builtin_abort ();						\
     312      r3 = neutral;							\
     313      for (int i = 0; i < SIZE; i++)					\
     314        if (MASK_VALUE & (1 << i))					\
     315  	r3 = r3 op v.a[i];						\
     316      if (r4 != r3)							\
     317        __builtin_abort ();						\
     318      type r6 = _mm512_mask_reduce_##opname##_##suffix (0, v.x);		\
     319      type r7 = test_mask_reduce_##opname##_##suffix (0, v.x);		\
     320      if (r6 != r7 || r6 != neutral)					\
     321        __builtin_abort ();						\
     322    } while (0)
     323  
     324  #define SIZE (AVX512F_LEN / 32)
     325  #include "avx512f-mask-type.h"
     326  
     327  #define TEST_EPI32(c1, c2, c3, c4, c5, c6, c7, c8, \
     328  		   c9, c10, c11, c12, c13, c14, c15, c16)		\
     329    do {									\
     330      UNION_TYPE (AVX512F_LEN, i_d) v;					\
     331      v.x = _mm512_set_epi32 (c1, c2, c3, c4, c5, c6, c7, c8,		\
     332  			    c9, c10, c11, c12, c13, c14, c15, c16);	\
     333      TESTOP (add, +, int, epi32, 0);					\
     334      TESTOP (mul, *, int, epi32, 1);					\
     335      TESTOP (and, &, int, epi32, ~0);					\
     336      TESTOP (or, |, int, epi32, 0);					\
     337      TESTOP (min, < v.a[i] ? r3 :, int, epi32, __INT_MAX__);		\
     338      TESTOP (max, > v.a[i] ? r3 :, int, epi32, -__INT_MAX__ - 1);	\
     339      TESTOP (min, < (unsigned) v.a[i] ? r3 :, unsigned, epu32, ~0U);	\
     340      TESTOP (max, > (unsigned) v.a[i] ? r3 :, unsigned, epu32, 0);	\
     341    } while (0)
     342  
     343  #define TEST_PS(c1, c2, c3, c4, c5, c6, c7, c8, \
     344  		c9, c10, c11, c12, c13, c14, c15, c16)			\
     345    do {									\
     346      UNION_TYPE (AVX512F_LEN, ) v;					\
     347      v.x = _mm512_set_ps (c1, c2, c3, c4, c5, c6, c7, c8,		\
     348  			 c9, c10, c11, c12, c13, c14, c15, c16);	\
     349      TESTOP (add, +, float, ps, 0.0f);					\
     350      TESTOP (mul, *, float, ps, 1.0f);					\
     351      TESTOP (min, < v.a[i] ? r3 :, float, ps, __builtin_inff ());	\
     352      TESTOP (max, > v.a[i] ? r3 :, float, ps, -__builtin_inff ());	\
     353    } while (0)
     354  
     355  static void
     356  test_epi32_ps (void)
     357  {
     358    TEST_EPI32 (1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4);
     359    TEST_EPI32 (-1, 15, -1, 7, -1, 7, -1, -1, 6, 6, -1, -1, -1, -1, 7, 6);
     360    TEST_PS (1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4);
     361    TEST_PS (1.25f, 2.25f, -0.25f, 4.0f, -2.0f, 4.0f, -3.0f, 2.0f,
     362             -0.5f, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 4.0f);
     363  }
     364  
     365  #undef SIZE
     366  #define SIZE (AVX512F_LEN / 64)
     367  #include "avx512f-mask-type.h"
     368  
     369  #define TEST_EPI64(c1, c2, c3, c4, c5, c6, c7, c8) \
     370    do {									\
     371      UNION_TYPE (AVX512F_LEN, i_q) v;					\
     372      v.x = _mm512_set_epi64 (c1, c2, c3, c4, c5, c6, c7, c8);		\
     373      TESTOP (add, +, long long, epi64, 0);				\
     374      TESTOP (mul, *, long long, epi64, 1);				\
     375      TESTOP (and, &, long long, epi64, ~0LL);				\
     376      TESTOP (or, |, long long, epi64, 0);				\
     377      TESTOP (min, < v.a[i] ? r3 :, long long, epi64, __LONG_LONG_MAX__);	\
     378      TESTOP (max, > v.a[i] ? r3 :, long long, epi64,			\
     379  	    -__LONG_LONG_MAX__ - 1);					\
     380      TESTOP (min, < (unsigned long long) v.a[i] ? r3 :,			\
     381  	    unsigned long long, epu64, ~0ULL);				\
     382      TESTOP (max, > (unsigned long long) v.a[i] ? r3 :,			\
     383  	    unsigned long long, epu64, 0);				\
     384    } while (0)
     385  
     386  #define TEST_PD(c1, c2, c3, c4, c5, c6, c7, c8) \
     387    do {									\
     388      UNION_TYPE (AVX512F_LEN, d) v;					\
     389      v.x = _mm512_set_pd (c1, c2, c3, c4, c5, c6, c7, c8);		\
     390      TESTOP (add, +, double, pd, 0.0);					\
     391      TESTOP (mul, *, double, pd, 1.0);					\
     392      TESTOP (min, < v.a[i] ? r3 :, double, pd, __builtin_inf ());	\
     393      TESTOP (max, > v.a[i] ? r3 :, double, pd, -__builtin_inf ());	\
     394    } while (0)
     395  
     396  static void
     397  test_epi64_pd (void)
     398  {
     399    TEST_EPI64 (1, 2, 3, 4, 5, 6, 6, 5);
     400    TEST_EPI64 (-1, 15, -1, 7, -1, 7, -1, -1);
     401    TEST_PD (1, 2, 3, 4, 5, 6, 6, 5);
     402    TEST_PD (1.25f, 2.25f, -0.25f, 4.0f, -2.0f, 4.0f, -3.0f, 2.0f);
     403  }
     404  
     405  void
     406  test_512 (void)
     407  {
     408    test_epi32_ps ();
     409    test_epi64_pd ();
     410  }