1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  #include <math.h>
       5  
       6  /* Expected results.  */
       7  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
       8  VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xd3cb, 0xd3cb, 0xd3cb, 0xd3cb };
       9  VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xc726, 0xc726, 0xc726, 0xc726,
      10  					      0xc726, 0xc726, 0xc726, 0xc726 };
      11  #endif
      12  VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2796b84, 0xc2796b84 };
      13  VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0e4a3d8, 0xc0e4a3d8,
      14  					   0xc0e4a3d8, 0xc0e4a3d8 };
      15  
      16  /* Expected results with input=NaN.  */
      17  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      18  VECT_VAR_DECL(expected_nan, hfloat, 16, 4) [] = { 0x7e00, 0x7e00,
      19  						  0x7e00, 0x7e00 };
      20  VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00,
      21  						  0x7e00, 0x7e00,
      22  						  0x7e00, 0x7e00,
      23  						  0x7e00, 0x7e00 };
      24  #endif
      25  VECT_VAR_DECL(expected_nan,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
      26  VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
      27  					       0x7fc00000, 0x7fc00000 };
      28  
      29  /* Expected results with FP special inputs values (infinity, 0).  */
      30  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      31  VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0xfc00, 0xfc00,
      32  						  0xfc00, 0xfc00 };
      33  VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x3e00, 0x3e00,
      34  						  0x3e00, 0x3e00,
      35  						  0x3e00, 0x3e00,
      36  						  0x3e00, 0x3e00 };
      37  #endif
      38  VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0xff800000, 0xff800000 };
      39  VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000,
      40  					       0x3fc00000, 0x3fc00000 };
      41  
      42  /* Expected results with only FP special inputs values (infinity,
      43     0).  */
      44  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      45  VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x3e00, 0x3e00,
      46  						  0x3e00, 0x3e00 };
      47  VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x3e00, 0x3e00,
      48  						  0x3e00, 0x3e00,
      49  						  0x3e00, 0x3e00,
      50  						  0x3e00, 0x3e00 };
      51  #endif
      52  VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x3fc00000, 0x3fc00000 };
      53  VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000,
      54  					       0x3fc00000, 0x3fc00000 };
      55  
      56  #define TEST_MSG "VRSQRTS/VRSQRTSQ"
      57  void exec_vrsqrts(void)
      58  {
      59    int i;
      60  
      61    /* Basic test: y=vrsqrts(x), then store the result.  */
      62  #define TEST_VRSQRTS(Q, T1, T2, W, N)			\
      63    VECT_VAR(vector_res, T1, W, N) =			\
      64      vrsqrts##Q##_##T2##W(VECT_VAR(vector, T1, W, N),	\
      65  			 VECT_VAR(vector2, T1, W, N));	\
      66    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
      67  		    VECT_VAR(vector_res, T1, W, N))
      68  
      69    /* No need for integer variants.  */
      70  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      71    DECL_VARIABLE(vector, float, 16, 4);
      72    DECL_VARIABLE(vector, float, 16, 8);
      73  #endif
      74    DECL_VARIABLE(vector, float, 32, 2);
      75    DECL_VARIABLE(vector, float, 32, 4);
      76  
      77  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      78    DECL_VARIABLE(vector2, float, 16, 4);
      79    DECL_VARIABLE(vector2, float, 16, 8);
      80  #endif
      81    DECL_VARIABLE(vector2, float, 32, 2);
      82    DECL_VARIABLE(vector2, float, 32, 4);
      83  
      84  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      85    DECL_VARIABLE(vector_res, float, 16, 4);
      86    DECL_VARIABLE(vector_res, float, 16, 8);
      87  #endif
      88    DECL_VARIABLE(vector_res, float, 32, 2);
      89    DECL_VARIABLE(vector_res, float, 32, 4);
      90  
      91    clean_results ();
      92  
      93    /* Choose init value arbitrarily.  */
      94  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      95    VDUP(vector, , float, f, 16, 4, 12.9f);
      96    VDUP(vector, q, float, f, 16, 8, 9.1f);
      97  #endif
      98    VDUP(vector, , float, f, 32, 2, 12.9f);
      99    VDUP(vector, q, float, f, 32, 4, 9.1f);
     100  
     101  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     102    VDUP(vector2, , float, f, 16, 4, 9.9f);
     103    VDUP(vector2, q, float, f, 16, 8, 1.9f);
     104  #endif
     105    VDUP(vector2, , float, f, 32, 2, 9.9f);
     106    VDUP(vector2, q, float, f, 32, 4, 1.9f);
     107  
     108    /* Apply the operator.  */
     109  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     110    TEST_VRSQRTS(, float, f, 16, 4);
     111    TEST_VRSQRTS(q, float, f, 16, 8);
     112  #endif
     113    TEST_VRSQRTS(, float, f, 32, 2);
     114    TEST_VRSQRTS(q, float, f, 32, 4);
     115  
     116  #define CMT ""
     117  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     118    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT);
     119    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT);
     120  #endif
     121    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT);
     122    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT);
     123  
     124  
     125    /* Test FP variants with special input values (NaN).  */
     126  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     127    VDUP(vector, , float, f, 16, 4, NAN);
     128    VDUP(vector2, q, float, f, 16, 8, NAN);
     129  #endif
     130    VDUP(vector, , float, f, 32, 2, NAN);
     131    VDUP(vector2, q, float, f, 32, 4, NAN);
     132  
     133    /* Apply the operator.  */
     134  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     135    TEST_VRSQRTS(, float, f, 16, 4);
     136    TEST_VRSQRTS(q, float, f, 16, 8);
     137  #endif
     138    TEST_VRSQRTS(, float, f, 32, 2);
     139    TEST_VRSQRTS(q, float, f, 32, 4);
     140  
     141  #undef CMT
     142  #define CMT " FP special (NAN) and normal values"
     143  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     144    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_nan, CMT);
     145    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_nan, CMT);
     146  #endif
     147    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_nan, CMT);
     148    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, CMT);
     149  
     150  
     151    /* Test FP variants with special input values (infinity, 0).  */
     152  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     153    VDUP(vector, , float, f, 16, 4, HUGE_VALF);
     154    VDUP(vector, q, float, f, 16, 8, 0.0f);
     155    /* Restore a normal value in vector2.  */
     156    VDUP(vector2, q, float, f, 16, 8, 3.2f);
     157  #endif
     158    VDUP(vector, , float, f, 32, 2, HUGE_VALF);
     159    VDUP(vector, q, float, f, 32, 4, 0.0f);
     160    /* Restore a normal value in vector2.  */
     161    VDUP(vector2, q, float, f, 32, 4, 3.2f);
     162  
     163    /* Apply the operator.  */
     164  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     165    TEST_VRSQRTS(, float, f, 16, 4);
     166    TEST_VRSQRTS(q, float, f, 16, 8);
     167  #endif
     168    TEST_VRSQRTS(, float, f, 32, 2);
     169    TEST_VRSQRTS(q, float, f, 32, 4);
     170  
     171  #undef CMT
     172  #define CMT " FP special (infinity, 0) and normal values"
     173  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     174    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT);
     175    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT);
     176  #endif
     177    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT);
     178    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT);
     179  
     180  
     181    /* Test FP variants with only special input values (infinity, 0).  */
     182  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     183    VDUP(vector, , float, f, 16, 4, HUGE_VALF);
     184    VDUP(vector, q, float, f, 16, 8, 0.0f);
     185    VDUP(vector2, , float, f, 16, 4, -0.0f);
     186    VDUP(vector2, q, float, f, 16, 8, HUGE_VALF);
     187  #endif
     188    VDUP(vector, , float, f, 32, 2, HUGE_VALF);
     189    VDUP(vector, q, float, f, 32, 4, 0.0f);
     190    VDUP(vector2, , float, f, 32, 2, -0.0f);
     191    VDUP(vector2, q, float, f, 32, 4, HUGE_VALF);
     192  
     193    /* Apply the operator.  */
     194  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     195    TEST_VRSQRTS(, float, f, 16, 4);
     196    TEST_VRSQRTS(q, float, f, 16, 8);
     197  #endif
     198    TEST_VRSQRTS(, float, f, 32, 2);
     199    TEST_VRSQRTS(q, float, f, 32, 4);
     200  
     201  #undef CMT
     202  #define CMT " only FP special (infinity, 0)"
     203  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     204    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT);
     205    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT);
     206  #endif
     207    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT);
     208    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT);
     209  }
     210  
     211  int main (void)
     212  {
     213    exec_vrsqrts ();
     214    return 0;
     215  }