1  /* { dg-do run { target avx512fp16 } } */
       2  /* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
       3  
       4  static void do_test (void);
       5  
       6  #define DO_TEST do_test
       7  #define AVX512FP16
       8  
       9  #include <immintrin.h>
      10  #include "avx512-check.h"
      11  
      12  __m256h a1 = { -39.3f16, -180.9f16, 13.4f16, 35.4f16, -41.1f16, -14.4f16, 24.5f16, 53.54f16,
      13  	       238.4f16, -134.8f16, 24.5f16, 35.6f16, -346.7f16, -43.4f16, -535.3f16, 324.7f16 };
      14  __m256h a2 = { 82.5f16, 21.4f16, 24.4f16, 53.4f16, 23.5f16, -24.4f16, -34.5f16, -32.5f16,
      15  	       23.6f16, -13.4f16, 24.5f16, 35.5f16, -34.4f16, -24.5f16, -34.5f16, 13.5f16 };
      16  
      17  __m128h b1 = { 1.25f16, 2.25f16, -0.25f16, 4.0f16, -2.0f16, 4.0f16, -3.0f16, 2.0f16 };
      18  __m128h b2 = { -0.5f16, -1.0f16, 1.0f16, -1.0f16, 1.0f16, 1.0f16, 2.0f16, 4.0f16 };
      19  __m128h b3 = { 1.25f16, 2.25f16, -4.25f16, 4.0f16, -2.4f16, 4.0f16, -3.0f, 2.0f16 };
      20  __m128h b4 = { -4.5f16, 7.6f16, 0.7f16, -8.2f16, 2.1f16, 2.4f16, -2.0f16, 1.4f16 };
      21  
      22  __attribute__((noinline, noclone)) _Float16
      23  test_reduce_256_add_ph (__m256h a)
      24  {
      25    return _mm256_reduce_add_ph (a);
      26  }
      27  
      28  __attribute__((noinline, noclone)) _Float16
      29  test_reduce_256_mul_ph (__m256h a)
      30  {
      31    return _mm256_reduce_mul_ph (a);
      32  }
      33  
      34  __attribute__((noinline, noclone)) _Float16
      35  test_reduce_256_max_ph (__m256h a)
      36  {
      37    return _mm256_reduce_max_ph (a);
      38  }
      39  
      40  __attribute__((noinline, noclone)) _Float16
      41  test_reduce_256_min_ph (__m256h a)
      42  {
      43    return _mm256_reduce_min_ph (a);
      44  }
      45  
      46  __attribute__((noinline, noclone)) _Float16
      47  test_reduce_add_ph (__m128h b)
      48  {
      49    return _mm_reduce_add_ph (b);
      50  }
      51  
      52  __attribute__((noinline, noclone)) _Float16
      53  test_reduce_mul_ph (__m128h b)
      54  {
      55    return _mm_reduce_mul_ph (b);
      56  }
      57  
      58  __attribute__((noinline, noclone)) _Float16
      59  test_reduce_max_ph (__m128h b)
      60  {
      61    return _mm_reduce_max_ph (b);
      62  }
      63  
      64  __attribute__((noinline, noclone)) _Float16
      65  test_reduce_min_ph (__m128h b)
      66  {
      67    return _mm_reduce_min_ph (b);
      68  }
      69  
      70  #define SIZE 16
      71  #define REF_ADDMUL(op, a)				\
      72    __m128h __a1 = _mm_setzero_ph ();			\
      73    for (int i = 0; i < 8; i++) {				\
      74      __a1[i] = (_Float16) a[i] op (_Float16) a[i + 8];	\
      75    }							\
      76    _Float16 __c0 = __a1[0] op __a1[4];			\
      77    _Float16 __c1 = __a1[1] op __a1[5];			\
      78    _Float16 __c2 = __a1[2] op __a1[6];			\
      79    _Float16 __c3 = __a1[3] op __a1[7];			\
      80    _Float16 __d0 = __c0 op __c2;				\
      81    _Float16 __d1 = __c1 op __c3;				\
      82    _Float16 __e0 = __d0 op __d1;				\
      83    r3 = __e0
      84  
      85  #define TESTOP(opname, op, a)				\
      86    do {							\
      87      _Float16 r1 = _mm256_reduce_##opname##_ph (a);	\
      88      _Float16 r2 = test_reduce_256_##opname##_ph (a);	\
      89      _Float16 r3 = a[0];					\
      90      if (r1 != r2) {					\
      91        __builtin_abort ();				\
      92      }							\
      93      REF_ADDMUL (op, a);					\
      94      if (r1 != r3) {					\
      95        __builtin_abort ();				\
      96      }							\
      97    } while (0)
      98  
      99  #define TEST_ADDMUL_PH(a)			\
     100    do {						\
     101      TESTOP (add, +, a);				\
     102      TESTOP (mul, *, a);				\
     103    } while (0)
     104  
     105  static void
     106  test_256_addmul_ph (void)
     107  {
     108    TEST_ADDMUL_PH (a1);
     109    TEST_ADDMUL_PH (a2);
     110  }
     111  
     112  #undef TESTOP
     113  #define TESTOP(opname, op, a)				\
     114    do {							\
     115      _Float16 r1 = _mm256_reduce_##opname##_ph (a);	\
     116      _Float16 r2 = test_reduce_256_##opname##_ph (a);	\
     117      _Float16 r3 = a[0];					\
     118      if (r1 != r2) {					\
     119        __builtin_abort ();				\
     120      }							\
     121      for (int i = 1; i < SIZE; i++)			\
     122        r3 = r3 op a[i];					\
     123      if (r1 != r3) {					\
     124        __builtin_abort ();				\
     125      }							\
     126    } while (0)
     127  
     128  #define TEST_MINMAX_PH(a)			\
     129    do {						\
     130      TESTOP (min, < a[i] ? r3 :, a);		\
     131      TESTOP (max, > a[i] ? r3 :, a);		\
     132    } while (0)
     133  
     134  static void
     135  test_256_minmax_ph (void)
     136  {
     137    TEST_MINMAX_PH (a1);
     138    TEST_MINMAX_PH (a2);
     139  }
     140  
     141  static void
     142  test_256_ph (void)
     143  {
     144     test_256_addmul_ph ();
     145     test_256_minmax_ph ();
     146  }
     147  
     148  #undef SIZE
     149  #define SIZE 8
     150  
     151  #undef REF_ADDMUL
     152  #define REF_ADDMUL(op, a)			\
     153    _Float16 __c0 = a[0] op a[4];			\
     154    _Float16 __c1 = a[1] op a[5];			\
     155    _Float16 __c2 = a[2] op a[6];			\
     156    _Float16 __c3 = a[3] op a[7];			\
     157    _Float16 __d0 = __c0 op __c2;			\
     158    _Float16 __d1 = __c1 op __c3;			\
     159    _Float16 __e0 = __d0 op __d1;			\
     160    r3 = __e0
     161  
     162  #undef TESTOP
     163  #define TESTOP(opname, op, a)				\
     164    do {							\
     165      _Float16 r1 = _mm_reduce_##opname##_ph (a);		\
     166      _Float16 r2 = test_reduce_##opname##_ph (a);	\
     167      _Float16 r3 = a[0];					\
     168      if (r1 != r2) {					\
     169        __builtin_abort ();				\
     170      }							\
     171      REF_ADDMUL (op, a);					\
     172      if (r1 != r3) {					\
     173        __builtin_abort ();				\
     174      }							\
     175    } while (0)
     176  
     177  #undef TEST_ADDMUL_PH
     178  #define TEST_ADDMUL_PH(a)			\
     179    do {						\
     180      TESTOP (add, +, a);				\
     181      TESTOP (mul, *, a);				\
     182    } while (0)
     183  
     184  static void
     185  test_128_addmul_ph (void)
     186  {
     187    TEST_ADDMUL_PH (b1);
     188    TEST_ADDMUL_PH (b2);
     189    TEST_ADDMUL_PH (b3);
     190    TEST_ADDMUL_PH (b4);
     191  }
     192  
     193  #undef TESTOP
     194  #define TESTOP(opname, op, b)				\
     195    do {							\
     196      _Float16 r1 = _mm_reduce_##opname##_ph (b);		\
     197      _Float16 r2 = test_reduce_##opname##_ph (b);	\
     198      _Float16 r3 = b[0];					\
     199      if (r1 != r2) {					\
     200        __builtin_abort ();				\
     201      }							\
     202      for (int i = 1; i < SIZE; i++)			\
     203        r3 = r3 op b[i];					\
     204      if (r1 != r3) {					\
     205        __builtin_abort ();				\
     206      }							\
     207    } while (0)
     208  
     209  #undef TEST_MINMAX_PH
     210  #define TEST_MINMAX_PH(b)			\
     211    do {						\
     212      TESTOP (min, < b[i] ? r3 :, b);		\
     213      TESTOP (max, > b[i] ? r3 :, b);		\
     214    } while (0)
     215  
     216  static void
     217  test_128_minmax_ph (void)
     218  {
     219    TEST_MINMAX_PH (b1);
     220    TEST_MINMAX_PH (b2);
     221    TEST_MINMAX_PH (b3);
     222    TEST_MINMAX_PH (b4);
     223  }
     224  
     225  static void
     226  test_128_ph (void)
     227  {
     228    test_128_addmul_ph ();
     229    test_128_minmax_ph ();
     230  }
     231  
     232  static void
     233  do_test (void)
     234  {
     235    test_256_ph ();
     236    test_128_ph ();
     237  }
     238  
     239  
     240  #undef SIZE
     241  #undef REF_ADDMUL
     242  #undef TESTOP
     243  #undef TEST_ADDMUL_PH
     244  #undef TEST_MINMAX_PH