1  /* Test the vmulxq_laneq_f32 AArch64 SIMD intrinsic.  */
       2  
       3  /* { dg-do run } */
       4  /* { dg-options "-save-temps -O3" } */
       5  
       6  #include "arm_neon.h"
       7  
       8  extern void abort (void);
       9  
      10  float32x4_t __attribute__ ((noinline))
      11  test_vmulxq_laneq_f32_lane0 (float32x4_t vec1_1, float32x4_t vec1_2)
      12  {
      13    return vmulxq_laneq_f32 (vec1_1, vec1_2, 0);
      14  }
      15  
      16  float32x4_t __attribute__ ((noinline))
      17  test_vmulxq_laneq_f32_lane1 (float32x4_t vec1_1, float32x4_t vec1_2)
      18  {
      19    return vmulxq_laneq_f32 (vec1_1, vec1_2, 1);
      20  }
      21  
      22  float32x4_t __attribute__ ((noinline))
      23  test_vmulxq_laneq_f32_lane2 (float32x4_t vec1_1, float32x4_t vec1_2)
      24  {
      25    return vmulxq_laneq_f32 (vec1_1, vec1_2, 2);
      26  }
      27  
      28  float32x4_t __attribute__ ((noinline))
      29  test_vmulxq_laneq_f32_lane3 (float32x4_t vec1_1, float32x4_t vec1_2)
      30  {
      31    return vmulxq_laneq_f32 (vec1_1, vec1_2, 3);
      32  }
      33  
      34  #define PASS_ARRAY(...) {__VA_ARGS__}
      35  
      36  #define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, EXP2, EXP3, I)		\
      37    void set_and_test_case##I ()						\
      38    {									\
      39      int i;								\
      40      float32_t vec1_data[] = V1_D;					\
      41      float32x4_t vec1 = vld1q_f32 (vec1_data);				\
      42      float32_t vec2_data[] =  V2_D;					\
      43      float32x4_t vec2 = vld1q_f32 (vec2_data);				\
      44  									\
      45      float32_t expected_lane0[] = EXP0;					\
      46      float32_t expected_lane1[] = EXP1;					\
      47      float32_t expected_lane2[] = EXP2;					\
      48      float32_t expected_lane3[] = EXP3;					\
      49  									\
      50      float32x4_t actual_lane0_v =					\
      51        test_vmulxq_laneq_f32_lane0 (vec1, vec2);				\
      52      float32_t actual_lane0[4];						\
      53      vst1q_f32 (actual_lane0, actual_lane0_v);				\
      54      for (i = 0; i < 4; ++i)						\
      55        if (actual_lane0[i] != expected_lane0[i])				\
      56  	abort ();							\
      57  									\
      58      float32x4_t actual_lane1_v =					\
      59        test_vmulxq_laneq_f32_lane1 (vec1, vec2);				\
      60      float32_t actual_lane1[4];						\
      61      vst1q_f32 (actual_lane1, actual_lane1_v);				\
      62      for (i = 0; i < 4; ++i)						\
      63        if (actual_lane1[i] != expected_lane1[i])				\
      64  	abort ();							\
      65  									\
      66      float32x4_t actual_lane2_v =					\
      67        test_vmulxq_laneq_f32_lane2 (vec1, vec2);				\
      68      float32_t actual_lane2[4];						\
      69      vst1q_f32 (actual_lane2, actual_lane2_v);				\
      70      for (i = 0; i < 4; ++i)						\
      71        if (actual_lane2[i] != expected_lane2[i])				\
      72  	abort ();							\
      73  									\
      74      float32x4_t actual_lane3_v =					\
      75        test_vmulxq_laneq_f32_lane3 (vec1, vec2);				\
      76      float32_t actual_lane3[4];						\
      77      vst1q_f32 (actual_lane3, actual_lane3_v);				\
      78      for (i = 0; i < 4; ++i)						\
      79        if (actual_lane3[i] != expected_lane3[i])				\
      80  	abort ();							\
      81    }									\
      82  
      83  float32_t v1 = 3.14159265359;
      84  float32_t v2 = 1.383894;
      85  float32_t v3 = -2.71828;
      86  float32_t v4 = -3.4891931;
      87  
      88  float32_t v5 = 0.0;
      89  float32_t v6 = -0.0;
      90  float32_t v7 = __builtin_huge_valf ();
      91  float32_t v8 = -__builtin_huge_valf ();
      92  
      93  float32_t spec = __builtin_huge_valf () * __builtin_huge_valf ();
      94  float32_t spec_n = -__builtin_huge_valf () * __builtin_huge_valf ();
      95  
      96  SETUP_VEC (PASS_ARRAY (v1, v2, v3, v4), PASS_ARRAY (v1, v2, v3, v4),
      97  	   PASS_ARRAY (v1*v1, v1*v2, v1*v3, v1*v4),
      98  	   PASS_ARRAY (v1*v2, v2*v2, v2*v3, v2*v4),
      99  	   PASS_ARRAY (v1*v3, v2*v3, v3*v3, v4*v3),
     100  	   PASS_ARRAY (v1*v4, v2*v4, v3*v4, v4*v4), 1)
     101  
     102  SETUP_VEC (PASS_ARRAY (v5, v6, v7, v8), PASS_ARRAY (v5, v6, v7, v8),
     103  	   PASS_ARRAY (0.0, -0.0, 2.0, -2.0),
     104  	   PASS_ARRAY (-0.0, 0.0, -2.0, 2.0),
     105  	   PASS_ARRAY (2.0, -2.0, spec, spec_n),
     106  	   PASS_ARRAY (-2.0, 2.0, spec_n, spec), 2)
     107  
     108  int
     109  main (void)
     110  {
     111    set_and_test_case1 ();
     112    set_and_test_case2 ();
     113    return 0;
     114  }
     115  /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
     116  /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
     117  /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */
     118  /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */