1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  #include <math.h>
       5  
       6  /* Expected results.  */
       7  VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
       8  VECT_VAR_DECL(expected,uint,32,4) [] = { 0x9c800000, 0x9c800000,
       9  					 0x9c800000, 0x9c800000 };
      10  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      11  VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x324c, 0x324c, 0x324c, 0x324c };
      12  VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x3380, 0x3380, 0x3380, 0x3380,
      13  					      0x3380, 0x3380, 0x3380, 0x3380 };
      14  #endif
      15  VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3e498000, 0x3e498000 };
      16  VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3e700000, 0x3e700000,
      17  					   0x3e700000, 0x3e700000 };
      18  
      19  /* Expected results with large uint #1.  */
      20  VECT_VAR_DECL(expected_1,uint,32,2) [] = { 0x80000000, 0x80000000 };
      21  VECT_VAR_DECL(expected_1,uint,32,4) [] = { 0xae800000, 0xae800000,
      22  					   0xae800000, 0xae800000 };
      23  
      24  /* Expected results with large uint #2.  */
      25  VECT_VAR_DECL(expected_2,uint,32,2) [] = { 0xb4800000, 0xb4800000 };
      26  VECT_VAR_DECL(expected_2,uint,32,4) [] = { 0xed000000, 0xed000000,
      27  					   0xed000000, 0xed000000 };
      28  
      29  /* Expected results with FP special inputs values (NaNs, ...).  */
      30  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      31  VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0x7e00, 0x7e00,
      32  						  0x7e00, 0x7e00 };
      33  VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x7c00, 0x7c00,
      34  						  0x7c00, 0x7c00,
      35  						  0x7c00, 0x7c00,
      36  						  0x7c00, 0x7c00 };
      37  #endif
      38  VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
      39  VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
      40  					       0x7f800000, 0x7f800000 };
      41  
      42  /* Expected results with FP special inputs values
      43     (negative, infinity).  */
      44  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      45  VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x7e00, 0x7e00,
      46  						  0x7e00, 0x7e00 };
      47  VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0,
      48  						  0x0, 0x0, 0x0 };
      49  #endif
      50  VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 };
      51  VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      52  
      53  /* Expected results with FP special inputs values
      54     (-0, -infinity).  */
      55  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      56  VECT_VAR_DECL(expected_fp3, hfloat, 16, 4) [] = { 0xfc00, 0xfc00,
      57  						  0xfc00, 0xfc00 };
      58  VECT_VAR_DECL(expected_fp3, hfloat, 16, 8) [] = { 0x7e00, 0x7e00,
      59  						  0x7e00, 0x7e00,
      60  						  0x7e00, 0x7e00,
      61  						  0x7e00, 0x7e00 };
      62  #endif
      63  VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 };
      64  VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
      65  					       0x7fc00000, 0x7fc00000 };
      66  
      67  #define TEST_MSG "VRSQRTE/VRSQRTEQ"
      68  void exec_vrsqrte(void)
      69  {
      70    int i;
      71  
      72    /* Basic test: y=vrsqrte(x), then store the result.  */
      73  #define TEST_VRSQRTE(Q, T1, T2, W, N)			\
      74    VECT_VAR(vector_res, T1, W, N) =			\
      75      vrsqrte##Q##_##T2##W(VECT_VAR(vector, T1, W, N));	\
      76    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),		\
      77  		    VECT_VAR(vector_res, T1, W, N))
      78  
      79    DECL_VARIABLE(vector, uint, 32, 2);
      80  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      81    DECL_VARIABLE(vector, float, 16, 4);
      82  #endif
      83    DECL_VARIABLE(vector, float, 32, 2);
      84    DECL_VARIABLE(vector, uint, 32, 4);
      85  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      86    DECL_VARIABLE(vector, float, 16, 8);
      87  #endif
      88    DECL_VARIABLE(vector, float, 32, 4);
      89  
      90    DECL_VARIABLE(vector_res, uint, 32, 2);
      91  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      92    DECL_VARIABLE(vector_res, float, 16, 4);
      93  #endif
      94    DECL_VARIABLE(vector_res, float, 32, 2);
      95    DECL_VARIABLE(vector_res, uint, 32, 4);
      96  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      97    DECL_VARIABLE(vector_res, float, 16, 8);
      98  #endif
      99    DECL_VARIABLE(vector_res, float, 32, 4);
     100  
     101    clean_results ();
     102  
     103    /* Choose init value arbitrarily.  */
     104    VDUP(vector, , uint, u, 32, 2, 0x12345678);
     105  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     106    VDUP(vector, , float, f, 16, 4, 25.799999f);
     107  #endif
     108    VDUP(vector, , float, f, 32, 2, 25.799999f);
     109    VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10);
     110  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     111    VDUP(vector, q, float, f, 16, 8, 18.2f);
     112  #endif
     113    VDUP(vector, q, float, f, 32, 4, 18.2f);
     114  
     115    /* Apply the operator.  */
     116    TEST_VRSQRTE(, uint, u, 32, 2);
     117  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     118    TEST_VRSQRTE(, float, f, 16, 4);
     119  #endif
     120    TEST_VRSQRTE(, float, f, 32, 2);
     121    TEST_VRSQRTE(q, uint, u, 32, 4);
     122  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     123    TEST_VRSQRTE(q, float, f, 16, 8);
     124  #endif
     125    TEST_VRSQRTE(q, float, f, 32, 4);
     126  
     127  #define CMT ""
     128    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
     129    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
     130  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     131    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT);
     132    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT);
     133  #endif
     134    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT);
     135    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT);
     136  
     137  
     138    /* Don't test FP variants with negative inputs.  */
     139    /* Use input with various values of bits 30 and 31.  */
     140    VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
     141    VDUP(vector, q, uint, u, 32, 4, 0x89081234);
     142  
     143    /* Apply the operator.  */
     144    TEST_VRSQRTE(, uint, u, 32, 2);
     145    TEST_VRSQRTE(q, uint, u, 32, 4);
     146  
     147  #undef CMT
     148  #define CMT " (large uint #1)"
     149    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_1, CMT);
     150    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_1, CMT);
     151  
     152  
     153    /* Choose init value arbitrarily.  */
     154    VDUP(vector, , uint, u, 32, 2, 0x80000000);
     155    VDUP(vector, q, uint, u, 32, 4, 0x4ABCDEF0);
     156  
     157    /* Apply the operator.  */
     158    TEST_VRSQRTE(, uint, u, 32, 2);
     159    TEST_VRSQRTE(q, uint, u, 32, 4);
     160  
     161  #undef CMT
     162  #define CMT " (large uint #2)"
     163    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_2, CMT);
     164    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_2, CMT);
     165  
     166  
     167    /* Test FP variants with special input values (NaNs, ...).  */
     168  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     169    VDUP(vector, , float, f, 16, 4, NAN);
     170    VDUP(vector, q, float, f, 16, 8, 0.0f);
     171  #endif
     172    VDUP(vector, , float, f, 32, 2, NAN);
     173    VDUP(vector, q, float, f, 32, 4, 0.0f);
     174  
     175    /* Apply the operator.  */
     176  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     177    TEST_VRSQRTE(, float, f, 16, 4);
     178    TEST_VRSQRTE(q, float, f, 16, 8);
     179  #endif
     180    TEST_VRSQRTE(, float, f, 32, 2);
     181    TEST_VRSQRTE(q, float, f, 32, 4);
     182  
     183  #undef CMT
     184  #define CMT " FP special (NaN, 0)"
     185  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     186    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT);
     187    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT);
     188  #endif
     189    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT);
     190    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT);
     191  
     192  
     193    /* Test FP variants with special input values (negative, infinity).  */
     194  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     195    VDUP(vector, , float, f, 16, 4, -1.0f);
     196    VDUP(vector, q, float, f, 16, 8, HUGE_VALF);
     197  #endif
     198    VDUP(vector, , float, f, 32, 2, -1.0f);
     199    VDUP(vector, q, float, f, 32, 4, HUGE_VALF);
     200  
     201    /* Apply the operator.  */
     202  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     203    TEST_VRSQRTE(, float, f, 16, 4);
     204    TEST_VRSQRTE(q, float, f, 16, 8);
     205  #endif
     206    TEST_VRSQRTE(, float, f, 32, 2);
     207    TEST_VRSQRTE(q, float, f, 32, 4);
     208  
     209  #undef CMT
     210  #define CMT " FP special (negative, infinity)"
     211  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     212    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT);
     213    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT);
     214  #endif
     215    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT);
     216    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT);
     217  
     218    /* Test FP variants with special input values (-0, -infinity).  */
     219  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     220    VDUP(vector, , float, f, 16, 4, -0.0f);
     221    VDUP(vector, q, float, f, 16, 8, -HUGE_VALF);
     222  #endif
     223    VDUP(vector, , float, f, 32, 2, -0.0f);
     224    VDUP(vector, q, float, f, 32, 4, -HUGE_VALF);
     225  
     226    /* Apply the operator.  */
     227  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     228    TEST_VRSQRTE(, float, f, 16, 4);
     229    TEST_VRSQRTE(q, float, f, 16, 8);
     230  #endif
     231    TEST_VRSQRTE(, float, f, 32, 2);
     232    TEST_VRSQRTE(q, float, f, 32, 4);
     233  
     234  #undef CMT
     235  #define CMT " FP special (-0, -infinity)"
     236  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     237    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT);
     238    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT);
     239  #endif
     240    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT);
     241    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT);
     242  }
     243  
     244  int main (void)
     245  {
     246    exec_vrsqrte ();
     247    return 0;
     248  }