1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  
       5  /* Expected results for vst2, chunk 0.  */
       6  VECT_VAR_DECL(expected_st2_0,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
       7  					     0x0, 0x0, 0x0, 0x0 };
       8  VECT_VAR_DECL(expected_st2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
       9  VECT_VAR_DECL(expected_st2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
      10  VECT_VAR_DECL(expected_st2_0,uint,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
      11  					      0x0, 0x0, 0x0, 0x0 };
      12  VECT_VAR_DECL(expected_st2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
      13  VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
      14  VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
      15  					      0x0, 0x0, 0x0, 0x0 };
      16  VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
      17  VECT_VAR_DECL(expected_st2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 };
      18  VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
      19  VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
      20  					      0x0, 0x0, 0x0, 0x0 };
      21  VECT_VAR_DECL(expected_st2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x0, 0x0 };
      22  VECT_VAR_DECL(expected_st2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
      23  					       0x0, 0x0, 0x0, 0x0 };
      24  VECT_VAR_DECL(expected_st2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
      25  					       0x0, 0x0 };
      26  VECT_VAR_DECL(expected_st2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
      27  					       0x0, 0x0, 0x0, 0x0 };
      28  VECT_VAR_DECL(expected_st2_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0x0, 0x0,
      29  						 0x0, 0x0, 0x0, 0x0 };
      30  VECT_VAR_DECL(expected_st2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
      31  						 0x0, 0x0 };
      32  
      33  /* Expected results for vst2, chunk 1.  */
      34  VECT_VAR_DECL(expected_st2_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
      35  					     0x0, 0x0, 0x0, 0x0 };
      36  VECT_VAR_DECL(expected_st2_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      37  VECT_VAR_DECL(expected_st2_1,int,32,2) [] = { 0x0, 0x0 };
      38  VECT_VAR_DECL(expected_st2_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
      39  					      0x0, 0x0, 0x0, 0x0 };
      40  VECT_VAR_DECL(expected_st2_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      41  VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 };
      42  VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
      43  					      0x0, 0x0, 0x0, 0x0 };
      44  VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      45  VECT_VAR_DECL(expected_st2_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      46  VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 };
      47  VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
      48  					      0x0, 0x0, 0x0, 0x0 };
      49  VECT_VAR_DECL(expected_st2_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      50  VECT_VAR_DECL(expected_st2_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
      51  					       0x0, 0x0, 0x0, 0x0 };
      52  VECT_VAR_DECL(expected_st2_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      53  VECT_VAR_DECL(expected_st2_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
      54  					       0x0, 0x0, 0x0, 0x0 };
      55  VECT_VAR_DECL(expected_st2_1,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
      56  						 0x0, 0x0, 0x0, 0x0 };
      57  VECT_VAR_DECL(expected_st2_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      58  
      59  /* Expected results for vst3, chunk 0.  */
      60  VECT_VAR_DECL(expected_st3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
      61  					     0x0, 0x0, 0x0, 0x0 };
      62  VECT_VAR_DECL(expected_st3_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
      63  VECT_VAR_DECL(expected_st3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
      64  VECT_VAR_DECL(expected_st3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
      65  					      0x0, 0x0, 0x0, 0x0 };
      66  VECT_VAR_DECL(expected_st3_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
      67  VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
      68  VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
      69  					      0x0, 0x0, 0x0, 0x0 };
      70  VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
      71  VECT_VAR_DECL(expected_st3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 };
      72  VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
      73  VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
      74  					      0x0, 0x0, 0x0, 0x0 };
      75  VECT_VAR_DECL(expected_st3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
      76  					      0xfffffff2, 0x0 };
      77  VECT_VAR_DECL(expected_st3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
      78  					       0x0, 0x0, 0x0, 0x0 };
      79  VECT_VAR_DECL(expected_st3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
      80  					       0xfffffff2, 0x0 };
      81  VECT_VAR_DECL(expected_st3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
      82  					       0x0, 0x0, 0x0, 0x0 };
      83  VECT_VAR_DECL(expected_st3_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0,
      84  						 0x0, 0x0, 0x0, 0x0 };
      85  VECT_VAR_DECL(expected_st3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
      86  						 0xc1600000, 0x0 };
      87  
      88  /* Expected results for vst3, chunk 1.  */
      89  VECT_VAR_DECL(expected_st3_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
      90  					     0x0, 0x0, 0x0, 0x0 };
      91  VECT_VAR_DECL(expected_st3_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      92  VECT_VAR_DECL(expected_st3_1,int,32,2) [] = { 0xfffffff2, 0x0 };
      93  VECT_VAR_DECL(expected_st3_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
      94  					      0x0, 0x0, 0x0, 0x0 };
      95  VECT_VAR_DECL(expected_st3_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      96  VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 };
      97  VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
      98  					      0x0, 0x0, 0x0, 0x0 };
      99  VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     100  VECT_VAR_DECL(expected_st3_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     101  VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 };
     102  VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     103  					      0x0, 0x0, 0x0, 0x0 };
     104  VECT_VAR_DECL(expected_st3_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     105  VECT_VAR_DECL(expected_st3_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     106  					       0x0, 0x0, 0x0, 0x0 };
     107  VECT_VAR_DECL(expected_st3_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     108  VECT_VAR_DECL(expected_st3_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     109  					       0x0, 0x0, 0x0, 0x0 };
     110  VECT_VAR_DECL(expected_st3_1,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     111  						 0x0, 0x0, 0x0, 0x0 };
     112  VECT_VAR_DECL(expected_st3_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     113  
     114  /* Expected results for vst3, chunk 2.  */
     115  VECT_VAR_DECL(expected_st3_2,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     116  					     0x0, 0x0, 0x0, 0x0 };
     117  VECT_VAR_DECL(expected_st3_2,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     118  VECT_VAR_DECL(expected_st3_2,int,32,2) [] = { 0x0, 0x0 };
     119  VECT_VAR_DECL(expected_st3_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     120  					      0x0, 0x0, 0x0, 0x0 };
     121  VECT_VAR_DECL(expected_st3_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     122  VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 };
     123  VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     124  					      0x0, 0x0, 0x0, 0x0 };
     125  VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     126  VECT_VAR_DECL(expected_st3_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     127  VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 };
     128  VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     129  					      0x0, 0x0, 0x0, 0x0 };
     130  VECT_VAR_DECL(expected_st3_2,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     131  VECT_VAR_DECL(expected_st3_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     132  					       0x0, 0x0, 0x0, 0x0 };
     133  VECT_VAR_DECL(expected_st3_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     134  VECT_VAR_DECL(expected_st3_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     135  					       0x0, 0x0, 0x0, 0x0 };
     136  VECT_VAR_DECL(expected_st3_2,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     137  						 0x0, 0x0, 0x0, 0x0 };
     138  VECT_VAR_DECL(expected_st3_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     139  
     140  /* Expected results for vst4, chunk 0.  */
     141  VECT_VAR_DECL(expected_st4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     142  					     0x0, 0x0, 0x0, 0x0 };
     143  VECT_VAR_DECL(expected_st4_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
     144  VECT_VAR_DECL(expected_st4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
     145  VECT_VAR_DECL(expected_st4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     146  					      0x0, 0x0, 0x0, 0x0 };
     147  VECT_VAR_DECL(expected_st4_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
     148  VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
     149  VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     150  					      0x0, 0x0, 0x0, 0x0 };
     151  VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
     152  VECT_VAR_DECL(expected_st4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
     153  VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
     154  VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     155  					      0x0, 0x0, 0x0, 0x0 };
     156  VECT_VAR_DECL(expected_st4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
     157  					      0xfffffff2, 0xfffffff3 };
     158  VECT_VAR_DECL(expected_st4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     159  					       0x0, 0x0, 0x0, 0x0 };
     160  VECT_VAR_DECL(expected_st4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
     161  					       0xfffffff2, 0xfffffff3 };
     162  VECT_VAR_DECL(expected_st4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     163  					       0x0, 0x0, 0x0, 0x0 };
     164  VECT_VAR_DECL(expected_st4_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
     165  						 0x0, 0x0, 0x0, 0x0 };
     166  VECT_VAR_DECL(expected_st4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
     167  						 0xc1600000, 0xc1500000 };
     168  
     169  /* Expected results for vst4, chunk 1.  */
     170  VECT_VAR_DECL(expected_st4_1,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     171  					     0x0, 0x0, 0x0, 0x0 };
     172  VECT_VAR_DECL(expected_st4_1,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     173  VECT_VAR_DECL(expected_st4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
     174  VECT_VAR_DECL(expected_st4_1,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     175  					      0x0, 0x0, 0x0, 0x0 };
     176  VECT_VAR_DECL(expected_st4_1,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     177  VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
     178  VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     179  					      0x0, 0x0, 0x0, 0x0 };
     180  VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     181  VECT_VAR_DECL(expected_st4_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     182  VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
     183  VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     184  					      0x0, 0x0, 0x0, 0x0 };
     185  VECT_VAR_DECL(expected_st4_1,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     186  VECT_VAR_DECL(expected_st4_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     187  					       0x0, 0x0, 0x0, 0x0 };
     188  VECT_VAR_DECL(expected_st4_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     189  VECT_VAR_DECL(expected_st4_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     190  					       0x0, 0x0, 0x0, 0x0 };
     191  VECT_VAR_DECL(expected_st4_1,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     192  						 0x0, 0x0, 0x0, 0x0 };
     193  VECT_VAR_DECL(expected_st4_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     194  
     195  /* Expected results for vst4, chunk 2.  */
     196  VECT_VAR_DECL(expected_st4_2,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     197  					     0x0, 0x0, 0x0, 0x0 };
     198  VECT_VAR_DECL(expected_st4_2,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     199  VECT_VAR_DECL(expected_st4_2,int,32,2) [] = { 0x0, 0x0 };
     200  VECT_VAR_DECL(expected_st4_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     201  					      0x0, 0x0, 0x0, 0x0 };
     202  VECT_VAR_DECL(expected_st4_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     203  VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 };
     204  VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     205  					      0x0, 0x0, 0x0, 0x0 };
     206  VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     207  VECT_VAR_DECL(expected_st4_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     208  VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 };
     209  VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     210  					      0x0, 0x0, 0x0, 0x0 };
     211  VECT_VAR_DECL(expected_st4_2,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     212  VECT_VAR_DECL(expected_st4_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     213  					       0x0, 0x0, 0x0, 0x0 };
     214  VECT_VAR_DECL(expected_st4_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     215  VECT_VAR_DECL(expected_st4_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     216  					       0x0, 0x0, 0x0, 0x0 };
     217  VECT_VAR_DECL(expected_st4_2,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     218  					       0x0, 0x0, 0x0, 0x0 };
     219  VECT_VAR_DECL(expected_st4_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     220  
     221  /* Expected results for vst4, chunk 3.  */
     222  VECT_VAR_DECL(expected_st4_3,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     223  					     0x0, 0x0, 0x0, 0x0 };
     224  VECT_VAR_DECL(expected_st4_3,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     225  VECT_VAR_DECL(expected_st4_3,int,32,2) [] = { 0x0, 0x0 };
     226  VECT_VAR_DECL(expected_st4_3,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     227  					      0x0, 0x0, 0x0, 0x0 };
     228  VECT_VAR_DECL(expected_st4_3,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     229  VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 };
     230  VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
     231  					      0x0, 0x0, 0x0, 0x0 };
     232  VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     233  VECT_VAR_DECL(expected_st4_3,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     234  VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 };
     235  VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     236  					      0x0, 0x0, 0x0, 0x0 };
     237  VECT_VAR_DECL(expected_st4_3,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     238  VECT_VAR_DECL(expected_st4_3,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     239  					       0x0, 0x0, 0x0, 0x0 };
     240  VECT_VAR_DECL(expected_st4_3,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     241  VECT_VAR_DECL(expected_st4_3,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     242  					       0x0, 0x0, 0x0, 0x0 };
     243  VECT_VAR_DECL(expected_st4_3,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
     244  						 0x0, 0x0, 0x0, 0x0 };
     245  VECT_VAR_DECL(expected_st4_3,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
     246  
     247  /* Declare additional input buffers as needed.  */
     248  /* Input buffers for vld2_lane.  */
     249  VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2);
     250  VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2);
     251  VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2);
     252  VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2);
     253  VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2);
     254  VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2);
     255  VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
     256  VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
     257  VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
     258  VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
     259  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     260  VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
     261  #endif
     262  VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2);
     263  
     264  /* Input buffers for vld3_lane.  */
     265  VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3);
     266  VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3);
     267  VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3);
     268  VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3);
     269  VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3);
     270  VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3);
     271  VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
     272  VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
     273  VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
     274  VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
     275  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     276  VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
     277  #endif
     278  VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3);
     279  
     280  /* Input buffers for vld4_lane.  */
     281  VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4);
     282  VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4);
     283  VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4);
     284  VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4);
     285  VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4);
     286  VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4);
     287  VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
     288  VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
     289  VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
     290  VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
     291  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     292  VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
     293  #endif
     294  VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4);
     295  
     296  void exec_vstX_lane (void)
     297  {
     298    /* In this case, input variables are arrays of vectors.  */
     299  #define DECL_VSTX_LANE(T1, W, N, X)					\
     300    VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
     301    VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
     302    VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
     303  
     304    /* We need to use a temporary result buffer (result_bis), because
     305       the one used for other tests is not large enough. A subset of the
     306       result data is moved from result_bis to result, and it is this
     307       subset which is used to check the actual behavior. The next
     308       macro enables to move another chunk of data from result_bis to
     309       result.  */
     310    /* We also use another extra input buffer (buffer_src), which we
     311       fill with 0xAA, and which it used to load a vector from which we
     312       read a given lane.  */
     313  #define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)				 \
     314    memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				 \
     315  	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			 \
     316    memset (VECT_VAR(result_bis_##X, T1, W, N), 0,			 \
     317  	  sizeof(VECT_VAR(result_bis_##X, T1, W, N)));			 \
     318  									 \
     319    VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				 \
     320      vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		 \
     321  									 \
     322    VECT_ARRAY_VAR(vector, T1, W, N, X) =					 \
     323      /* Use dedicated init buffer, of size X.  */			 \
     324      vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X),	 \
     325  			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	 \
     326  			     L);					 \
     327    vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		 \
     328  			   VECT_ARRAY_VAR(vector, T1, W, N, X),		 \
     329  			   L);						 \
     330    memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
     331  	 sizeof(VECT_VAR(result, T1, W, N)));
     332  
     333    /* Overwrite "result" with the contents of "result_bis"[Y].  */
     334  #define TEST_EXTRA_CHUNK(T1, W, N, X, Y)		\
     335    memcpy(VECT_VAR(result, T1, W, N),			\
     336  	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
     337  	 sizeof(VECT_VAR(result, T1, W, N)));
     338  
     339    /* We need all variants in 64 bits, but there is no 64x2 variant,
     340       nor 128 bits vectors of int8/uint8/poly8.  */
     341  #define DECL_ALL_VSTX_LANE_NO_FP16(X)		\
     342    DECL_VSTX_LANE(int, 8, 8, X);			\
     343    DECL_VSTX_LANE(int, 16, 4, X);		\
     344    DECL_VSTX_LANE(int, 32, 2, X);		\
     345    DECL_VSTX_LANE(uint, 8, 8, X);		\
     346    DECL_VSTX_LANE(uint, 16, 4, X);		\
     347    DECL_VSTX_LANE(uint, 32, 2, X);		\
     348    DECL_VSTX_LANE(poly, 8, 8, X);		\
     349    DECL_VSTX_LANE(poly, 16, 4, X);		\
     350    DECL_VSTX_LANE(float, 32, 2, X);		\
     351    DECL_VSTX_LANE(int, 16, 8, X);		\
     352    DECL_VSTX_LANE(int, 32, 4, X);		\
     353    DECL_VSTX_LANE(uint, 16, 8, X);		\
     354    DECL_VSTX_LANE(uint, 32, 4, X);		\
     355    DECL_VSTX_LANE(poly, 16, 8, X);		\
     356    DECL_VSTX_LANE(float, 32, 4, X)
     357  
     358  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     359  #define DECL_ALL_VSTX_LANE(X)		\
     360    DECL_ALL_VSTX_LANE_NO_FP16(X);	\
     361    DECL_VSTX_LANE(float, 16, 4, X);	\
     362    DECL_VSTX_LANE(float, 16, 8, X)
     363  #else
     364  #define DECL_ALL_VSTX_LANE(X) DECL_ALL_VSTX_LANE_NO_FP16(X)
     365  #endif
     366  
     367  #define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L]
     368  
     369    /* Use the same lanes regardless of the size of the array (X), for
     370       simplicity.  */
     371  #define TEST_ALL_VSTX_LANE_NO_FP16(X)		\
     372    TEST_VSTX_LANE(, int, s, 8, 8, X, 7);		\
     373    TEST_VSTX_LANE(, int, s, 16, 4, X, 2);	\
     374    TEST_VSTX_LANE(, int, s, 32, 2, X, 0);	\
     375    TEST_VSTX_LANE(, float, f, 32, 2, X, 0);	\
     376    TEST_VSTX_LANE(, uint, u, 8, 8, X, 4);	\
     377    TEST_VSTX_LANE(, uint, u, 16, 4, X, 3);	\
     378    TEST_VSTX_LANE(, uint, u, 32, 2, X, 1);	\
     379    TEST_VSTX_LANE(, poly, p, 8, 8, X, 4);	\
     380    TEST_VSTX_LANE(, poly, p, 16, 4, X, 3);	\
     381    TEST_VSTX_LANE(q, int, s, 16, 8, X, 6);	\
     382    TEST_VSTX_LANE(q, int, s, 32, 4, X, 2);	\
     383    TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5);	\
     384    TEST_VSTX_LANE(q, uint, u, 32, 4, X, 0);	\
     385    TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5);	\
     386    TEST_VSTX_LANE(q, float, f, 32, 4, X, 2)
     387  
     388  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     389  #define TEST_ALL_VSTX_LANE(X)			\
     390    TEST_ALL_VSTX_LANE_NO_FP16(X);		\
     391    TEST_VSTX_LANE(, float, f, 16, 4, X, 2);	\
     392    TEST_VSTX_LANE(q, float, f, 16, 8, X, 6)
     393  #else
     394  #define TEST_ALL_VSTX_LANE(X) TEST_ALL_VSTX_LANE_NO_FP16(X)
     395  #endif
     396  
     397  #define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)	\
     398    TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
     399    TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
     400    TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
     401    TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
     402    TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
     403    TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
     404    TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
     405    TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
     406    TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
     407    TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
     408    TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
     409    TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
     410    TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
     411    TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
     412    TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
     413  
     414  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     415  #define TEST_ALL_EXTRA_CHUNKS(X,Y)		\
     416    TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y);		\
     417    TEST_EXTRA_CHUNK(float, 16, 4, X, Y);		\
     418    TEST_EXTRA_CHUNK(float, 16, 8, X, Y)
     419  #else
     420  #define TEST_ALL_EXTRA_CHUNKS(X,Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)
     421  #endif
     422  
     423    /* Declare the temporary buffers / variables.  */
     424    DECL_ALL_VSTX_LANE(2);
     425    DECL_ALL_VSTX_LANE(3);
     426    DECL_ALL_VSTX_LANE(4);
     427  
     428    /* Define dummy input arrays, large enough for x4 vectors.  */
     429    DUMMY_ARRAY(buffer_src, int, 8, 8, 4);
     430    DUMMY_ARRAY(buffer_src, int, 16, 4, 4);
     431    DUMMY_ARRAY(buffer_src, int, 32, 2, 4);
     432    DUMMY_ARRAY(buffer_src, uint, 8, 8, 4);
     433    DUMMY_ARRAY(buffer_src, uint, 16, 4, 4);
     434    DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
     435    DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
     436    DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
     437  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     438    DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
     439  #endif
     440    DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
     441    DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
     442    DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
     443    DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
     444    DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
     445    DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
     446  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     447    DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
     448  #endif
     449    DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
     450  
     451    /* Check vst2_lane/vst2q_lane.  */
     452    clean_results ();
     453  #define TEST_MSG "VST2_LANE/VST2Q_LANE"
     454    TEST_ALL_VSTX_LANE(2);
     455  
     456  #define CMT " (chunk 0)"
     457    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_0, CMT);
     458    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_0, CMT);
     459    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_0, CMT);
     460    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_0, CMT);
     461    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_0, CMT);
     462    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT);
     463    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT);
     464    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT);
     465    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT);
     466    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT);
     467    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT);
     468    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_0, CMT);
     469    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_0, CMT);
     470    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_0, CMT);
     471    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_0, CMT);
     472  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     473    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st2_0, CMT);
     474    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st2_0, CMT);
     475  #endif
     476  
     477    TEST_ALL_EXTRA_CHUNKS(2, 1);
     478  #undef CMT
     479  #define CMT " (chunk 1)"
     480    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st2_1, CMT);
     481    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st2_1, CMT);
     482    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st2_1, CMT);
     483    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_1, CMT);
     484    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_1, CMT);
     485    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT);
     486    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT);
     487    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT);
     488    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT);
     489    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT);
     490    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT);
     491    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_1, CMT);
     492    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_1, CMT);
     493    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_1, CMT);
     494    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_1, CMT);
     495  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     496    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st2_1, CMT);
     497    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st2_1, CMT);
     498  #endif
     499  
     500  
     501    /* Check vst3_lane/vst3q_lane.  */
     502    clean_results ();
     503  #undef TEST_MSG
     504  #define TEST_MSG "VST3_LANE/VST3Q_LANE"
     505    TEST_ALL_VSTX_LANE(3);
     506  
     507  #undef CMT
     508  #define CMT " (chunk 0)"
     509    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_0, CMT);
     510    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_0, CMT);
     511    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_0, CMT);
     512    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_0, CMT);
     513    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_0, CMT);
     514    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT);
     515    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT);
     516    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT);
     517    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT);
     518    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT);
     519    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT);
     520    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_0, CMT);
     521    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_0, CMT);
     522    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_0, CMT);
     523    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_0, CMT);
     524  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     525    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_0, CMT);
     526    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_0, CMT);
     527  #endif
     528  
     529    TEST_ALL_EXTRA_CHUNKS(3, 1);
     530  
     531  #undef CMT
     532  #define CMT " (chunk 1)"
     533    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_1, CMT);
     534    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_1, CMT);
     535    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_1, CMT);
     536    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_1, CMT);
     537    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_1, CMT);
     538    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT);
     539    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT);
     540    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT);
     541    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT);
     542    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT);
     543    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT);
     544    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_1, CMT);
     545    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_1, CMT);
     546    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_1, CMT);
     547    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_1, CMT);
     548  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     549    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_1, CMT);
     550    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_1, CMT);
     551  #endif
     552  
     553    TEST_ALL_EXTRA_CHUNKS(3, 2);
     554  
     555  #undef CMT
     556  #define CMT " (chunk 2)"
     557    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st3_2, CMT);
     558    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st3_2, CMT);
     559    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st3_2, CMT);
     560    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_2, CMT);
     561    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_2, CMT);
     562    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT);
     563    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT);
     564    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT);
     565    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT);
     566    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT);
     567    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT);
     568    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_2, CMT);
     569    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_2, CMT);
     570    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_2, CMT);
     571    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_2, CMT);
     572  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     573    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_2, CMT);
     574    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_2, CMT);
     575  #endif
     576  
     577  
     578    /* Check vst4_lane/vst4q_lane.  */
     579    clean_results ();
     580  #undef TEST_MSG
     581  #define TEST_MSG "VST4_LANE/VST4Q_LANE"
     582    TEST_ALL_VSTX_LANE(4);
     583  
     584  #undef CMT
     585  #define CMT " (chunk 0)"
     586    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_0, CMT);
     587    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_0, CMT);
     588    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_0, CMT);
     589    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_0, CMT);
     590    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_0, CMT);
     591    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT);
     592    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT);
     593    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT);
     594    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT);
     595    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT);
     596    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT);
     597    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_0, CMT);
     598    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_0, CMT);
     599    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_0, CMT);
     600    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_0, CMT);
     601  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     602    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_0, CMT);
     603    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_0, CMT);
     604  #endif
     605  
     606    TEST_ALL_EXTRA_CHUNKS(4, 1);
     607  
     608  #undef CMT
     609  #define CMT " (chunk 1)"
     610    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_1, CMT);
     611    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_1, CMT);
     612    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_1, CMT);
     613    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_1, CMT);
     614    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_1, CMT);
     615    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT);
     616    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT);
     617    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT);
     618    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT);
     619    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT);
     620    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT);
     621    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_1, CMT);
     622    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_1, CMT);
     623    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_1, CMT);
     624    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_1, CMT);
     625  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     626    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_1, CMT);
     627    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_1, CMT);
     628  #endif
     629  
     630    TEST_ALL_EXTRA_CHUNKS(4, 2);
     631  
     632  #undef CMT
     633  #define CMT " (chunk 2)"
     634    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_2, CMT);
     635    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_2, CMT);
     636    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_2, CMT);
     637    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_2, CMT);
     638    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_2, CMT);
     639    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT);
     640    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT);
     641    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT);
     642    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT);
     643    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT);
     644    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT);
     645    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_2, CMT);
     646    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_2, CMT);
     647    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_2, CMT);
     648    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_2, CMT);
     649  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     650    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_2, CMT);
     651    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_2, CMT);
     652  #endif
     653  
     654    TEST_ALL_EXTRA_CHUNKS(4, 3);
     655  
     656  #undef CMT
     657  #define CMT " (chunk 3)"
     658    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_st4_3, CMT);
     659    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_st4_3, CMT);
     660    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_st4_3, CMT);
     661    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_3, CMT);
     662    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_3, CMT);
     663    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT);
     664    CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT);
     665    CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT);
     666    CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT);
     667    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT);
     668    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT);
     669    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_3, CMT);
     670    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_3, CMT);
     671    CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_3, CMT);
     672    CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_3, CMT);
     673  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     674    CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_3, CMT);
     675    CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_3, CMT);
     676  #endif
     677  }
     678  
     679  int main (void)
     680  {
     681    exec_vstX_lane ();
     682    return 0;
     683  }