1  /* { dg-do run } */
       2  /* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */
       3  /* { dg-add-options arm_v8_2a_fp16_neon } */
       4  /* { dg-skip-if "" { arm*-*-* } } */
       5  
       6  #include <arm_neon.h>
       7  #include "arm-neon-ref.h"
       8  #include "compute-ref-data.h"
       9  
      10  #define FP16_C(a) ((__fp16) a)
      11  #define A FP16_C (13.4)
      12  #define B FP16_C (-56.8)
      13  #define C FP16_C (-34.8)
      14  #define D FP16_C (12)
      15  #define E FP16_C (63.1)
      16  #define F FP16_C (19.1)
      17  #define G FP16_C (-4.8)
      18  #define H FP16_C (77)
      19  
      20  #define I FP16_C (0.7)
      21  #define J FP16_C (-78)
      22  #define K FP16_C (11.23)
      23  #define L FP16_C (98)
      24  #define M FP16_C (87.1)
      25  #define N FP16_C (-8)
      26  #define O FP16_C (-1.1)
      27  #define P FP16_C (-9.7)
      28  
      29  /* Expected results for vmul_lane.  */
      30  VECT_VAR_DECL (expected0_static, hfloat, 16, 4) []
      31    = { 0x629B /* A * E.  */,
      32        0xEB00 /* B * E.  */,
      33        0xE84A /* C * E.  */,
      34        0x61EA /* D * E.  */ };
      35  
      36  VECT_VAR_DECL (expected1_static, hfloat, 16, 4) []
      37    = { 0x5BFF /* A * F.  */,
      38        0xE43D /* B * F.  */,
      39        0xE131 /* C * F.  */,
      40        0x5B29 /* D * F.  */ };
      41  
      42  VECT_VAR_DECL (expected2_static, hfloat, 16, 4) []
      43    = { 0xD405 /* A * G.  */,
      44        0x5C43 /* B * G.  */,
      45        0x5939 /* C * G.  */,
      46        0xD334 /* D * G.  */ };
      47  
      48  VECT_VAR_DECL (expected3_static, hfloat, 16, 4) []
      49    = { 0x6408 /* A * H.  */,
      50        0xEC46 /* B * H.  */,
      51        0xE93C /* C * H.  */,
      52        0x6338 /* D * H.  */ };
      53  
      54  /* Expected results for vmulq_lane.  */
      55  VECT_VAR_DECL (expected0_static, hfloat, 16, 8) []
      56    = { 0x629B /* A * E.  */,
      57        0xEB00 /* B * E.  */,
      58        0xE84A /* C * E.  */,
      59        0x61EA /* D * E.  */,
      60        0x5186 /* I * E.  */,
      61        0xECCE /* J * E.  */,
      62        0x6189 /* K * E.  */,
      63        0x6E0A /* L * E.  */ };
      64  
      65  VECT_VAR_DECL (expected1_static, hfloat, 16, 8) []
      66    = { 0x5BFF /* A * F.  */,
      67        0xE43D /* B * F.  */,
      68        0xE131 /* C * F.  */,
      69        0x5B29 /* D * F.  */,
      70        0x4AAF /* I * F.  */,
      71        0xE5D1 /* J * F.  */,
      72        0x5AB3 /* K * F.  */,
      73        0x674F /* L * F.  */ };
      74  
      75  VECT_VAR_DECL (expected2_static, hfloat, 16, 8) []
      76    = { 0xD405 /* A * G.  */,
      77        0x5C43 /* B * G.  */,
      78        0x5939 /* C * G.  */,
      79        0xD334 /* D * G.  */,
      80        0xC2B9 /* I * G.  */,
      81        0x5DDA /* J * G.  */,
      82        0xD2BD /* K * G.  */,
      83        0xDF5A /* L * G.  */ };
      84  
      85  VECT_VAR_DECL (expected3_static, hfloat, 16, 8) []
      86    = { 0x6408 /* A * H.  */,
      87        0xEC46 /* B * H.  */,
      88        0xE93C /* C * H.  */,
      89        0x6338 /* D * H.  */,
      90        0x52BD /* I * H.  */,
      91        0xEDDE /* J * H.  */,
      92        0x62C1 /* K * H.  */,
      93        0x6F5E /* L * H.  */ };
      94  
      95  /* Expected results for vmul_laneq.  */
      96  VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 4) []
      97    = { 0x629B /* A * E.  */,
      98        0xEB00 /* B * E.  */,
      99        0xE84A /* C * E.  */,
     100        0x61EA /* D * E.  */ };
     101  
     102  VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 4) []
     103    = { 0x5BFF /* A * F.  */,
     104        0xE43D /* B * F.  */,
     105        0xE131 /* C * F.  */,
     106        0x5B29 /* D * F.  */ };
     107  
     108  VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 4) []
     109    = { 0xD405 /* A * G.  */,
     110        0x5C43 /* B * G.  */,
     111        0x5939 /* C * G.  */,
     112        0xD334 /* D * G.  */ };
     113  
     114  VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 4) []
     115    = { 0x6408 /* A * H.  */,
     116        0xEC46 /* B * H.  */,
     117        0xE93C /* C * H.  */,
     118        0x6338 /* D * H.  */ };
     119  
     120  VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 4) []
     121    = { 0x648F /* A * M.  */,
     122        0xECD5 /* B * M.  */,
     123        0xE9ED /* C * M.  */,
     124        0x6416 /* D * M.  */ };
     125  
     126  VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 4) []
     127    = { 0xD6B3 /* A * N.  */,
     128        0x5F1A /* B * N.  */,
     129        0x5C5A /* C * N.  */,
     130        0xD600 /* D * N.  */ };
     131  
     132  VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 4) []
     133    = { 0xCB5E /* A * O.  */,
     134        0x53CF /* B * O.  */,
     135        0x50C9 /* C * O.  */,
     136        0xCA99 /* D * O.  */ };
     137  
     138  VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 4) []
     139    = { 0xD810 /* A * P.  */,
     140        0x604F /* B * P.  */,
     141        0x5D47 /* C * P.  */,
     142        0xD747 /* D * P.  */ };
     143  
     144  /* Expected results for vmulq_laneq.  */
     145  VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 8) []
     146    = { 0x629B /* A * E.  */,
     147        0xEB00 /* B * E.  */,
     148        0xE84A /* C * E.  */,
     149        0x61EA /* D * E.  */,
     150        0x5186 /* I * E.  */,
     151        0xECCE /* J * E.  */,
     152        0x6189 /* K * E.  */,
     153        0x6E0A /* L * E.  */ };
     154  
     155  VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 8) []
     156    = { 0x5BFF /* A * F.  */,
     157        0xE43D /* B * F.  */,
     158        0xE131 /* C * F.  */,
     159        0x5B29 /* D * F.  */,
     160        0x4AAF /* I * F.  */,
     161        0xE5D1 /* J * F.  */,
     162        0x5AB3 /* K * F.  */,
     163        0x674F /* L * F.  */ };
     164  
     165  VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 8) []
     166    = { 0xD405 /* A * G.  */,
     167        0x5C43 /* B * G.  */,
     168        0x5939 /* C * G.  */,
     169        0xD334 /* D * G.  */,
     170        0xC2B9 /* I * G.  */,
     171        0x5DDA /* J * G.  */,
     172        0xD2BD /* K * G.  */,
     173        0xDF5A /* L * G.  */ };
     174  
     175  VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 8) []
     176    = { 0x6408 /* A * H.  */,
     177        0xEC46 /* B * H.  */,
     178        0xE93C /* C * H.  */,
     179        0x6338 /* D * H.  */,
     180        0x52BD /* I * H.  */,
     181        0xEDDE /* J * H.  */,
     182        0x62C1 /* K * H.  */,
     183        0x6F5E /* L * H.  */ };
     184  
     185  VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 8) []
     186    = { 0x648F /* A * M.  */,
     187        0xECD5 /* B * M.  */,
     188        0xE9ED /* C * M.  */,
     189        0x6416 /* D * M.  */,
     190        0x53A0 /* I * M.  */,
     191        0xEEA3 /* J * M.  */,
     192        0x63A4 /* K * M.  */,
     193        0x702B /* L * M.  */ };
     194  
     195  VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 8) []
     196    = { 0xD6B3 /* A * N.  */,
     197        0x5F1A /* B * N.  */,
     198        0x5C5A /* C * N.  */,
     199        0xD600 /* D * N.  */,
     200        0xC59A /* I * N.  */,
     201        0x60E0 /* J * N.  */,
     202        0xD59D /* K * N.  */,
     203        0xE220 /* L * N.  */ };
     204  
     205  VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 8) []
     206    = { 0xCB5E /* A * O.  */,
     207        0x53CF /* B * O.  */,
     208        0x50C9 /* C * O.  */,
     209        0xCA99 /* D * O.  */,
     210        0xBA29 /* I * O.  */,
     211        0x555C /* J * O.  */,
     212        0xCA2C /* K * O.  */,
     213        0xD6BC /* L * O.  */ };
     214  
     215  VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 8) []
     216    = { 0xD810 /* A * P.  */,
     217        0x604F /* B * P.  */,
     218        0x5D47 /* C * P.  */,
     219        0xD747 /* D * P.  */,
     220        0xC6CB /* I * P.  */,
     221        0x61EA /* J * P.  */,
     222        0xD6CF /* K * P.  */,
     223        0xE36E /* L * P.  */ };
     224  
     225  void exec_vmul_lane_f16 (void)
     226  {
     227  #undef TEST_MSG
     228  #define TEST_MSG "VMUL_LANE (FP16)"
     229    clean_results ();
     230  
     231    DECL_VARIABLE(vsrc_1, float, 16, 4);
     232    DECL_VARIABLE(vsrc_2, float, 16, 4);
     233    VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D};
     234    VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H};
     235    VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4);
     236    VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4);
     237    DECL_VARIABLE (vector_res, float, 16, 4)
     238      = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     239  		     VECT_VAR (vsrc_2, float, 16, 4), 0);
     240    vst1_f16 (VECT_VAR (result, float, 16, 4),
     241  	    VECT_VAR (vector_res, float, 16, 4));
     242  
     243    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, "");
     244  
     245    VECT_VAR (vector_res, float, 16, 4)
     246      = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     247  		     VECT_VAR (vsrc_2, float, 16, 4), 1);
     248    vst1_f16 (VECT_VAR (result, float, 16, 4),
     249  	    VECT_VAR (vector_res, float, 16, 4));
     250  
     251    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, "");
     252  
     253    VECT_VAR (vector_res, float, 16, 4)
     254      = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     255  		     VECT_VAR (vsrc_2, float, 16, 4), 2);
     256    vst1_f16 (VECT_VAR (result, float, 16, 4),
     257  	    VECT_VAR (vector_res, float, 16, 4));
     258  
     259    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, "");
     260  
     261    VECT_VAR (vector_res, float, 16, 4)
     262      = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     263  		     VECT_VAR (vsrc_2, float, 16, 4), 3);
     264    vst1_f16 (VECT_VAR (result, float, 16, 4),
     265  	    VECT_VAR (vector_res, float, 16, 4));
     266  
     267    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, "");
     268  
     269  #undef TEST_MSG
     270  #define TEST_MSG "VMULQ_LANE (FP16)"
     271    clean_results ();
     272  
     273    DECL_VARIABLE(vsrc_1, float, 16, 8);
     274    VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L};
     275    VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8);
     276    DECL_VARIABLE (vector_res, float, 16, 8)
     277      = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     278  		      VECT_VAR (vsrc_2, float, 16, 4), 0);
     279  
     280    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     281  	     VECT_VAR (vector_res, float, 16, 8));
     282  
     283    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, "");
     284  
     285    VECT_VAR (vector_res, float, 16, 8)
     286      = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     287  		      VECT_VAR (vsrc_2, float, 16, 4), 1);
     288    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     289  	     VECT_VAR (vector_res, float, 16, 8));
     290  
     291    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, "");
     292  
     293    VECT_VAR (vector_res, float, 16, 8)
     294      = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     295  		      VECT_VAR (vsrc_2, float, 16, 4), 2);
     296    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     297  	     VECT_VAR (vector_res, float, 16, 8));
     298  
     299    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, "");
     300  
     301    VECT_VAR (vector_res, float, 16, 8)
     302      = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     303  		      VECT_VAR (vsrc_2, float, 16, 4), 3);
     304    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     305  	     VECT_VAR (vector_res, float, 16, 8));
     306  
     307    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, "");
     308  
     309  #undef TEST_MSG
     310  #define TEST_MSG "VMUL_LANEQ (FP16)"
     311    clean_results ();
     312  
     313    DECL_VARIABLE(vsrc_2, float, 16, 8);
     314    VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P};
     315    VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8);
     316    VECT_VAR (vector_res, float, 16, 4)
     317      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     318  		      VECT_VAR (vsrc_2, float, 16, 8), 0);
     319    vst1_f16 (VECT_VAR (result, float, 16, 4),
     320  	    VECT_VAR (vector_res, float, 16, 4));
     321  
     322    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq0_static, "");
     323  
     324    VECT_VAR (vector_res, float, 16, 4)
     325      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     326  		      VECT_VAR (vsrc_2, float, 16, 8), 1);
     327    vst1_f16 (VECT_VAR (result, float, 16, 4),
     328  	    VECT_VAR (vector_res, float, 16, 4));
     329  
     330    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq1_static, "");
     331  
     332    VECT_VAR (vector_res, float, 16, 4)
     333      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     334  		      VECT_VAR (vsrc_2, float, 16, 8), 2);
     335    vst1_f16 (VECT_VAR (result, float, 16, 4),
     336  	    VECT_VAR (vector_res, float, 16, 4));
     337  
     338    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq2_static, "");
     339  
     340    VECT_VAR (vector_res, float, 16, 4)
     341      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     342  		      VECT_VAR (vsrc_2, float, 16, 8), 3);
     343    vst1_f16 (VECT_VAR (result, float, 16, 4),
     344  	    VECT_VAR (vector_res, float, 16, 4));
     345  
     346    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq3_static, "");
     347  
     348    VECT_VAR (vector_res, float, 16, 4)
     349      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     350  		      VECT_VAR (vsrc_2, float, 16, 8), 4);
     351    vst1_f16 (VECT_VAR (result, float, 16, 4),
     352  	    VECT_VAR (vector_res, float, 16, 4));
     353  
     354    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq4_static, "");
     355  
     356    VECT_VAR (vector_res, float, 16, 4)
     357      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     358  		      VECT_VAR (vsrc_2, float, 16, 8), 5);
     359    vst1_f16 (VECT_VAR (result, float, 16, 4),
     360  	    VECT_VAR (vector_res, float, 16, 4));
     361  
     362    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq5_static, "");
     363  
     364    VECT_VAR (vector_res, float, 16, 4)
     365      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     366  		      VECT_VAR (vsrc_2, float, 16, 8), 6);
     367    vst1_f16 (VECT_VAR (result, float, 16, 4),
     368  	    VECT_VAR (vector_res, float, 16, 4));
     369  
     370    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq6_static, "");
     371  
     372    VECT_VAR (vector_res, float, 16, 4)
     373      = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4),
     374  		      VECT_VAR (vsrc_2, float, 16, 8), 7);
     375    vst1_f16 (VECT_VAR (result, float, 16, 4),
     376  	    VECT_VAR (vector_res, float, 16, 4));
     377  
     378    CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq7_static, "");
     379  
     380  #undef TEST_MSG
     381  #define TEST_MSG "VMULQ_LANEQ (FP16)"
     382    clean_results ();
     383  
     384    VECT_VAR (vector_res, float, 16, 8)
     385      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     386  		       VECT_VAR (vsrc_2, float, 16, 8), 0);
     387    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     388  	     VECT_VAR (vector_res, float, 16, 8));
     389  
     390    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq0_static, "");
     391  
     392    VECT_VAR (vector_res, float, 16, 8)
     393      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     394  		       VECT_VAR (vsrc_2, float, 16, 8), 1);
     395    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     396  	     VECT_VAR (vector_res, float, 16, 8));
     397  
     398    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq1_static, "");
     399  
     400    VECT_VAR (vector_res, float, 16, 8)
     401      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     402  		       VECT_VAR (vsrc_2, float, 16, 8), 2);
     403    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     404  	     VECT_VAR (vector_res, float, 16, 8));
     405  
     406    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq2_static, "");
     407  
     408    VECT_VAR (vector_res, float, 16, 8)
     409      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     410  		       VECT_VAR (vsrc_2, float, 16, 8), 3);
     411    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     412  	     VECT_VAR (vector_res, float, 16, 8));
     413  
     414    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq3_static, "");
     415  
     416    VECT_VAR (vector_res, float, 16, 8)
     417      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     418  		       VECT_VAR (vsrc_2, float, 16, 8), 4);
     419    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     420  	     VECT_VAR (vector_res, float, 16, 8));
     421  
     422    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq4_static, "");
     423  
     424    VECT_VAR (vector_res, float, 16, 8)
     425      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     426  		       VECT_VAR (vsrc_2, float, 16, 8), 5);
     427    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     428  	     VECT_VAR (vector_res, float, 16, 8));
     429  
     430    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq5_static, "");
     431  
     432    VECT_VAR (vector_res, float, 16, 8)
     433      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     434  		       VECT_VAR (vsrc_2, float, 16, 8), 6);
     435    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     436  	     VECT_VAR (vector_res, float, 16, 8));
     437  
     438    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq6_static, "");
     439  
     440    VECT_VAR (vector_res, float, 16, 8)
     441      = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8),
     442  		       VECT_VAR (vsrc_2, float, 16, 8), 7);
     443    vst1q_f16 (VECT_VAR (result, float, 16, 8),
     444  	     VECT_VAR (vector_res, float, 16, 8));
     445  
     446    CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq7_static, "");
     447  }
     448  
     449  int
     450  main (void)
     451  {
     452    exec_vmul_lane_f16 ();
     453    return 0;
     454  }