(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
arm/
simd/
vdot-exec.c
       1  /* { dg-do run } */
       2  /* { dg-additional-options "-O3" } */
       3  /* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */
       4  /* { dg-add-options arm_v8_2a_dotprod_neon }  */
       5  
       6  #include <arm_neon.h>
       7  
       8  extern void abort();
       9  
      10  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
      11  # define ORDER(x, y) y
      12  #else
      13  # define ORDER(x, y) (x - y)
      14  #endif
      15  
      16  #define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2
      17  #define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ }
      18  #define TEST(t1, t2, t3, f, r1, r2, n1, n2) \
      19  	ARR(f, x, t1, r1);		    \
      20  	ARR(f, y, t2, r2);		    \
      21  	t3 f##_##r = {0};		    \
      22  	f##_##r = f (f##_##r, f##_##x, f##_##y);  \
      23  	if (f##_##r[0] != n1 || f##_##r[1] != n2)   \
      24  	  abort ();
      25  
      26  #define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \
      27  	ARR(f, x, t1, r1);		    \
      28  	ARR(f, y, t2, r2);		    \
      29  	t3 f##_##rx = {0};		    \
      30  	f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0));  \
      31  	if (f##_##rx[0] != n1 || f##_##rx[1] != n2)   \
      32  	  abort ();				    \
      33  	t3 f##_##rx1 = {0};			    \
      34  	f##_##rx1 =  f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1));  \
      35  	if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4)   \
      36  	  abort ();
      37  
      38  #define P2(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2,n1,n1,n1,n1,n2,n2,n2,n2
      39  #define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \
      40  	ARR(f, x, t1, r1);		    \
      41  	ARR(f, y, t2, r2);		    \
      42  	t3 f##_##rx = {0};		    \
      43  	f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (3, 2));  \
      44  	if (f##_##rx[0] != n1 || f##_##rx[1] != n2)   \
      45  	  abort ();				    \
      46  	t3 f##_##rx1 = {0};			    \
      47  	f##_##rx1 =  f (f##_##rx1, f##_##x, f##_##y, ORDER (3, 3));  \
      48  	if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4)   \
      49  	  abort ();
      50  
      51  int
      52  main()
      53  {
      54    TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24);
      55    TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24);
      56  
      57    TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24);
      58    TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24);
      59  
      60    TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
      61    TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
      62  
      63    TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
      64    TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
      65  
      66    TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), P2(2,3), 8, 16, 12, 24);
      67    TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), P2(-2,-3), -8, -16, -12, -24);
      68  
      69    TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, P2(1,2), P2(2,3), 8, 16, 12, 24);
      70    TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, P2(1,2), P2(-2,-3), -8, -16, -12, -24);
      71  
      72    return 0;
      73  }