1  /* { dg-do run } */
       2  /* { dg-options "-O3 --save-temps -ffast-math" } */
       3  
       4  #pragma GCC target "+nosve"
       5  
       6  #include <arm_neon.h>
       7  
       8  extern void abort (void);
       9  extern float fabsf (float);
      10  extern double fabs (double);
      11  
      12  #define NUM_TESTS 16
      13  #define DELTA 0.000001
      14  
      15  int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
      16  		       -4, 34, 110, -110, 6, 4, 75, -34};
      17  int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
      18  			 -4, 34, 110, -110, 6, 4, 75, -34};
      19  int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
      20  			 -4, 34, 110, -110, 6, 4, 75, -34};
      21  int64_t input_int64[] = {1, 56, 2, -9, -90, 23, 54, 76,
      22  			 -4, 34, 110, -110, 6, 4, 75, -34};
      23  
      24  uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
      25  			 4, 34, 110, 110, 6, 4, 75, 34};
      26  uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
      27  			   4, 34, 110, 110, 6, 4, 75, 34};
      28  uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
      29  			   4, 34, 110, 110, 6, 4, 75, 34};
      30  
      31  uint64_t input_uint64[] = {1, 56, 2, 9, 90, 23, 54, 76,
      32  			   4, 34, 110, 110, 6, 4, 75, 34};
      33  
      34  float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
      35  			 200.0f, -800.0f, -13.0f, -0.5f,
      36  			 7.9f, -870.0f, 10.4f, 310.11f,
      37  			 0.0f, -865.0f, -2213.0f, -1.5f};
      38  
      39  double input_float64[] = {0.1, -0.1, 0.4, 10.3,
      40  			  200.0, -800.0, -13.0, -0.5,
      41  			  7.9, -870.0, 10.4, 310.11,
      42  			  0.0, -865.0, -2213.0, -1.5};
      43  
      44  #define EQUALF(a, b) (fabsf (a - b) < DELTA)
      45  #define EQUALD(a, b) (fabs (a - b) < DELTA)
      46  #define EQUALL(a, b) (a == b)
      47  
      48  #define TEST(SUFFIX, Q, TYPE, LANES, FLOAT)				\
      49  int									\
      50  test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void)			\
      51  {									\
      52    int i, j;								\
      53    int moves = (NUM_TESTS - LANES) + 1;					\
      54    TYPE##_t out_l[NUM_TESTS];						\
      55    TYPE##_t out_v[NUM_TESTS];						\
      56  									\
      57    /* Calculate linearly.  */						\
      58    for (i = 0; i < moves; i++)						\
      59      {									\
      60        asm ("" : "=r" (out_l[i]) : "0" (0));				\
      61        for (j = 0; j < LANES; j++)					\
      62  	out_l[i] += input_##TYPE[i + j];				\
      63      }									\
      64  									\
      65    /* Calculate using vector reduction intrinsics.  */			\
      66    for (i = 0; i < moves; i++)						\
      67      {									\
      68        TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i);	\
      69        out_v[i] = vaddv##Q##_##SUFFIX (t1);				\
      70      }									\
      71  									\
      72    /* Compare.  */							\
      73    for (i = 0; i < moves; i++)						\
      74      {									\
      75        if (!EQUAL##FLOAT (out_v[i], out_l[i]))				\
      76  	return 0;							\
      77      }									\
      78    return 1;								\
      79  }
      80  
      81  #define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F)	\
      82  TEST (STYPE,  , TYPE, W32, F)				\
      83  TEST (STYPE, q, TYPE, W64, F)				\
      84  
      85  BUILD_VARIANTS (int8, s8, 8, 16, L)
      86  BUILD_VARIANTS (uint8, u8, 8, 16, L)
      87  /* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
      88  /* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
      89  BUILD_VARIANTS (int16, s16, 4, 8, L)
      90  BUILD_VARIANTS (uint16, u16, 4, 8, L)
      91  /* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
      92  /* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
      93  BUILD_VARIANTS (int32, s32, 2, 4, L)
      94  BUILD_VARIANTS (uint32, u32, 2, 4, L)
      95  /* { dg-final { scan-assembler "addp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
      96  /* { dg-final { scan-assembler "addv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
      97  TEST (s64, q, int64, 2, D)
      98  TEST (u64, q, uint64, 2, D)
      99  /* { dg-final { scan-assembler "addp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
     100  
     101  BUILD_VARIANTS (float32, f32, 2, 4, F)
     102  /* { dg-final { scan-assembler "faddp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
     103  /* { dg-final { scan-assembler "faddp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
     104  TEST (f64, q, float64, 2, D)
     105  /* { dg-final { scan-assembler "faddp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
     106  
     107  #undef TEST
     108  #define TEST(SUFFIX, Q, TYPE, LANES, FLOAT)		\
     109  {							\
     110    if (!test_vaddv##SUFFIX##_##TYPE##x##LANES##_t ())	\
     111      abort ();						\
     112  }
     113  
     114  int
     115  main (int argc, char **argv)
     116  {
     117  BUILD_VARIANTS (int8, s8, 8, 16, L)
     118  BUILD_VARIANTS (uint8, u8, 8, 16, L)
     119  BUILD_VARIANTS (int16, s16, 4, 8, L)
     120  BUILD_VARIANTS (uint16, u16, 4, 8, L)
     121  BUILD_VARIANTS (int32, s32, 2, 4, L)
     122  BUILD_VARIANTS (uint32, u32, 2, 4, L)
     123  
     124  BUILD_VARIANTS (float32, f32, 2, 4, F)
     125  TEST (f64, q, float64, 2, D)
     126  
     127    return 0;
     128  }
     129