1  /* { dg-do run } */
       2  /* { dg-options "-O3 --save-temps" } */
       3  
       4  #include <arm_neon.h>
       5  #include <math.h>
       6  #include <stdlib.h>
       7  
       8  int
       9  test_frecps_float32_t (void)
      10  {
      11    int i;
      12    float32_t value = 0.2;
      13    float32_t reciprocal = 5.0;
      14    float32_t step = vrecpes_f32 (value);
      15    /* 3 steps should give us within ~0.001 accuracy.  */
      16    for (i = 0; i < 3; i++)
      17      step = step * vrecpss_f32 (step, value);
      18  
      19    return fabs (step - reciprocal) < 0.001;
      20  }
      21  
      22  /* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
      23  /* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
      24  
      25  int
      26  test_frecps_float32x2_t (void)
      27  {
      28    int i;
      29    int ret = 1;
      30  
      31    const float32_t value_pool[] = {0.2, 0.4};
      32    const float32_t reciprocal_pool[] = {5.0, 2.5};
      33    float32x2_t value = vld1_f32 (value_pool);
      34    float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
      35  
      36    float32x2_t step = vrecpe_f32 (value);
      37    /* 3 steps should give us within ~0.001 accuracy.  */
      38    for (i = 0; i < 3; i++)
      39      step = step * vrecps_f32 (step, value);
      40  
      41    ret &= fabs (vget_lane_f32 (step, 0)
      42  	       - vget_lane_f32 (reciprocal, 0)) < 0.001;
      43    ret &= fabs (vget_lane_f32 (step, 1)
      44  	       - vget_lane_f32 (reciprocal, 1)) < 0.001;
      45  
      46    return ret;
      47  }
      48  
      49  /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
      50  /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
      51  
      52  int
      53  test_frecps_float32x4_t (void)
      54  {
      55    int i;
      56    int ret = 1;
      57  
      58    const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
      59    const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
      60    float32x4_t value = vld1q_f32 (value_pool);
      61    float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
      62  
      63    float32x4_t step = vrecpeq_f32 (value);
      64    /* 3 steps should give us within ~0.001 accuracy.  */
      65    for (i = 0; i < 3; i++)
      66      step = step * vrecpsq_f32 (step, value);
      67  
      68    ret &= fabs (vgetq_lane_f32 (step, 0)
      69  	       - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
      70    ret &= fabs (vgetq_lane_f32 (step, 1)
      71  	       - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
      72    ret &= fabs (vgetq_lane_f32 (step, 2)
      73  	       - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
      74    ret &= fabs (vgetq_lane_f32 (step, 3)
      75  	       - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
      76  
      77    return ret;
      78  }
      79  
      80  /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
      81  /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
      82  
      83  int
      84  test_frecps_float64_t (void)
      85  {
      86    int i;
      87    float64_t value = 0.2;
      88    float64_t reciprocal = 5.0;
      89    float64_t step = vrecped_f64 (value);
      90    /* 3 steps should give us within ~0.001 accuracy.  */
      91    for (i = 0; i < 3; i++)
      92      step = step * vrecpsd_f64 (step, value);
      93  
      94    return fabs (step - reciprocal) < 0.001;
      95  }
      96  
      97  /* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
      98  /* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
      99  
     100  int
     101  test_frecps_float64x2_t (void)
     102  {
     103    int i;
     104    int ret = 1;
     105  
     106    const float64_t value_pool[] = {0.2, 0.4};
     107    const float64_t reciprocal_pool[] = {5.0, 2.5};
     108    float64x2_t value = vld1q_f64 (value_pool);
     109    float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
     110  
     111    float64x2_t step = vrecpeq_f64 (value);
     112    /* 3 steps should give us within ~0.001 accuracy.  */
     113    for (i = 0; i < 3; i++)
     114      step = step * vrecpsq_f64 (step, value);
     115  
     116    ret &= fabs (vgetq_lane_f64 (step, 0)
     117  	       - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
     118    ret &= fabs (vgetq_lane_f64 (step, 1)
     119  	       - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
     120  
     121    return ret;
     122  }
     123  
     124  /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
     125  /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
     126  
     127  int
     128  main (int argc, char **argv)
     129  {
     130    if (!test_frecps_float32_t ())
     131      abort ();
     132    if (!test_frecps_float32x2_t ())
     133      abort ();
     134    if (!test_frecps_float32x4_t ())
     135      abort ();
     136    if (!test_frecps_float64_t ())
     137      abort ();
     138    if (!test_frecps_float64x2_t ())
     139      abort ();
     140  
     141    return 0;
     142  }
     143