1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  
       5  #ifdef __ARM_FEATURE_FMA
       6  
       7  /* Expected results.  */
       8  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
       9  VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x61c6, 0x61c8, 0x61ca, 0x61cc };
      10  VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x6435, 0x6436, 0x6437, 0x6438,
      11  					      0x6439, 0x643a, 0x643b, 0x643c };
      12  #endif
      13  VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d };
      14  VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8,
      15  					   0x4486deb8, 0x4486feb8 };
      16  #ifdef __aarch64__
      17  VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520,
      18  					   0x40890ee1532b8520 };
      19  #endif
      20  
      21  #define TEST_MSG "VFMA/VFMAQ"
      22  
      23  void exec_vfma (void)
      24  {
      25    /* Basic test: v4=vfma(v1,v2), then store the result.  */
      26  #define TEST_VFMA(Q, T1, T2, W, N)					\
      27    VECT_VAR(vector_res, T1, W, N) =					\
      28      vfma##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
      29  		      VECT_VAR(vector2, T1, W, N),			\
      30  		      VECT_VAR(vector3, T1, W, N));			\
      31    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
      32  
      33  #ifdef __aarch64__
      34  #define CHECK_VFMA_RESULTS(test_name,comment)				\
      35    {									\
      36      CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment);	\
      37      CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment);	\
      38      CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment);	\
      39    }
      40  #define DECL_VFMA_VAR(VAR)			\
      41    DECL_VARIABLE(VAR, float, 32, 2);		\
      42    DECL_VARIABLE(VAR, float, 32, 4);		\
      43    DECL_VARIABLE(VAR, float, 64, 2);
      44  #else
      45  #define CHECK_VFMA_RESULTS(test_name,comment)				\
      46    {									\
      47      CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment);	\
      48      CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment);	\
      49    }
      50  #define DECL_VFMA_VAR(VAR)			\
      51    DECL_VARIABLE(VAR, float, 32, 2);		\
      52    DECL_VARIABLE(VAR, float, 32, 4);
      53  #endif
      54  
      55  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      56    DECL_VARIABLE(vector1, float, 16, 4);
      57    DECL_VARIABLE(vector2, float, 16, 4);
      58    DECL_VARIABLE(vector3, float, 16, 4);
      59    DECL_VARIABLE(vector_res, float, 16, 4);
      60  
      61    DECL_VARIABLE(vector1, float, 16, 8);
      62    DECL_VARIABLE(vector2, float, 16, 8);
      63    DECL_VARIABLE(vector3, float, 16, 8);
      64    DECL_VARIABLE(vector_res, float, 16, 8);
      65  #endif
      66  
      67    DECL_VFMA_VAR(vector1);
      68    DECL_VFMA_VAR(vector2);
      69    DECL_VFMA_VAR(vector3);
      70    DECL_VFMA_VAR(vector_res);
      71  
      72    clean_results ();
      73  
      74    /* Initialize input "vector1" from "buffer".  */
      75  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      76    VLOAD(vector1, buffer, , float, f, 16, 4);
      77    VLOAD(vector1, buffer, q, float, f, 16, 8);
      78  #endif
      79    VLOAD(vector1, buffer, , float, f, 32, 2);
      80    VLOAD(vector1, buffer, q, float, f, 32, 4);
      81  #ifdef __aarch64__
      82    VLOAD(vector1, buffer, q, float, f, 64, 2);
      83  #endif
      84  
      85    /* Choose init value arbitrarily.  */
      86  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      87    VDUP(vector2, , float, f, 16, 4, 9.3f);
      88    VDUP(vector2, q, float, f, 16, 8, 29.7f);
      89  #endif
      90    VDUP(vector2, , float, f, 32, 2, 9.3f);
      91    VDUP(vector2, q, float, f, 32, 4, 29.7f);
      92  #ifdef __aarch64__
      93    VDUP(vector2, q, float, f, 64, 2, 15.8f);
      94  #endif
      95  
      96    /* Choose init value arbitrarily.  */
      97  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
      98    VDUP(vector3, , float, f, 16, 4, 81.2f);
      99    VDUP(vector3, q, float, f, 16, 8, 36.8f);
     100  #endif
     101    VDUP(vector3, , float, f, 32, 2, 81.2f);
     102    VDUP(vector3, q, float, f, 32, 4, 36.8f);
     103  #ifdef __aarch64__
     104    VDUP(vector3, q, float, f, 64, 2, 51.7f);
     105  #endif
     106  
     107    /* Execute the tests.  */
     108  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     109    TEST_VFMA(, float, f, 16, 4);
     110    TEST_VFMA(q, float, f, 16, 8);
     111  #endif
     112    TEST_VFMA(, float, f, 32, 2);
     113    TEST_VFMA(q, float, f, 32, 4);
     114  #ifdef __aarch64__
     115    TEST_VFMA(q, float, f, 64, 2);
     116  #endif
     117  
     118  #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
     119    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
     120    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, "");
     121  #endif
     122    CHECK_VFMA_RESULTS (TEST_MSG, "");
     123  }
     124  #endif
     125  
     126  int main (void)
     127  {
     128  #ifdef __ARM_FEATURE_FMA
     129    exec_vfma ();
     130  #endif
     131    return 0;
     132  }