1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  
       5  #ifdef __ARM_FEATURE_FMA
       6  /* Expected results.  */
       7  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
       8  VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xe206, 0xe204, 0xe202, 0xe200 };
       9  VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xe455, 0xe454, 0xe453, 0xe452,
      10  					      0xe451, 0xe450, 0xe44f, 0xe44e };
      11  #endif
      12  VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc440ca3d, 0xc4408a3d };
      13  VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc48a9eb8, 0xc48a7eb8,
      14  					   0xc48a5eb8, 0xc48a3eb8 };
      15  #ifdef __aarch64__
      16  VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0xc08a06e1532b8520,
      17  					   0xc089fee1532b8520 };
      18  #endif
      19  
      20  #define TEST_MSG "VFMS/VFMSQ"
      21  
      22  void exec_vfms (void)
      23  {
      24    /* Basic test: v4=vfms(v1,v2), then store the result.  */
      25  #define TEST_VFMS(Q, T1, T2, W, N)					\
      26    VECT_VAR(vector_res, T1, W, N) =					\
      27      vfms##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
      28  		      VECT_VAR(vector2, T1, W, N),			\
      29  		      VECT_VAR(vector3, T1, W, N));			\
      30    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
      31  
      32  #ifdef __aarch64__
      33  #define CHECK_VFMS_RESULTS(test_name,comment)				\
      34    {									\
      35      CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment);	\
      36      CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment);	\
      37      CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment);	\
      38    }
      39  #define DECL_VFMS_VAR(VAR)			\
      40    DECL_VARIABLE(VAR, float, 32, 2);		\
      41    DECL_VARIABLE(VAR, float, 32, 4);		\
      42    DECL_VARIABLE(VAR, float, 64, 2);
      43  #else
      44  #define CHECK_VFMS_RESULTS(test_name,comment)				\
      45    {									\
      46      CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment);	\
      47      CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment);	\
      48    }
      49  #define DECL_VFMS_VAR(VAR)			\
      50    DECL_VARIABLE(VAR, float, 32, 2);		\
      51    DECL_VARIABLE(VAR, float, 32, 4);
      52  #endif
      53  
      54  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      55    DECL_VARIABLE(vector1, float, 16, 4);
      56    DECL_VARIABLE(vector2, float, 16, 4);
      57    DECL_VARIABLE(vector3, float, 16, 4);
      58    DECL_VARIABLE(vector_res, float, 16, 4);
      59  
      60    DECL_VARIABLE(vector1, float, 16, 8);
      61    DECL_VARIABLE(vector2, float, 16, 8);
      62    DECL_VARIABLE(vector3, float, 16, 8);
      63    DECL_VARIABLE(vector_res, float, 16, 8);
      64  #endif
      65  
      66    DECL_VFMS_VAR(vector1);
      67    DECL_VFMS_VAR(vector2);
      68    DECL_VFMS_VAR(vector3);
      69    DECL_VFMS_VAR(vector_res);
      70  
      71    clean_results ();
      72  
      73    /* Initialize input "vector1" from "buffer".  */
      74  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      75    VLOAD(vector1, buffer, , float, f, 16, 4);
      76    VLOAD(vector1, buffer, q, float, f, 16, 8);
      77  #endif
      78    VLOAD(vector1, buffer, , float, f, 32, 2);
      79    VLOAD(vector1, buffer, q, float, f, 32, 4);
      80  #ifdef __aarch64__
      81    VLOAD(vector1, buffer, q, float, f, 64, 2);
      82  #endif
      83  
      84    /* Choose init value arbitrarily.  */
      85  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      86    VDUP(vector2, , float, f, 16, 4, 9.3f);
      87    VDUP(vector2, q, float, f, 16, 8, 29.7f);
      88  #endif
      89    VDUP(vector2, , float, f, 32, 2, 9.3f);
      90    VDUP(vector2, q, float, f, 32, 4, 29.7f);
      91  #ifdef __aarch64__
      92    VDUP(vector2, q, float, f, 64, 2, 15.8f);
      93  #endif
      94  
      95    /* Choose init value arbitrarily.  */
      96  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      97    VDUP(vector3, , float, f, 16, 4, 81.2f);
      98    VDUP(vector3, q, float, f, 16, 8, 36.8f);
      99  #endif
     100    VDUP(vector3, , float, f, 32, 2, 81.2f);
     101    VDUP(vector3, q, float, f, 32, 4, 36.8f);
     102  #ifdef __aarch64__
     103    VDUP(vector3, q, float, f, 64, 2, 51.7f);
     104  #endif
     105  
     106    /* Execute the tests.  */
     107  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     108    TEST_VFMS(, float, f, 16, 4);
     109    TEST_VFMS(q, float, f, 16, 8);
     110  #endif
     111    TEST_VFMS(, float, f, 32, 2);
     112    TEST_VFMS(q, float, f, 32, 4);
     113  #ifdef __aarch64__
     114    TEST_VFMS(q, float, f, 64, 2);
     115  #endif
     116  
     117  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     118    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
     119    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, "");
     120  #endif
     121    CHECK_VFMS_RESULTS (TEST_MSG, "");
     122  }
     123  #endif
     124  
     125  int main (void)
     126  {
     127  #ifdef __ARM_FEATURE_FMA
     128    exec_vfms ();
     129  #endif
     130    return 0;
     131  }