(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
aarch64/
advsimd-intrinsics/
vldX.c
       1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  
       5  /* Expected results.  */
       6  
       7  /* vld2/chunk 0.  */
       8  VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
       9  					      0xf4, 0xf5, 0xf6, 0xf7 };
      10  VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
      11  VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
      12  VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0xfffffffffffffff0 };
      13  VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
      14  					       0xf4, 0xf5, 0xf6, 0xf7 };
      15  VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
      16  VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
      17  VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
      18  VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
      19  					       0xf4, 0xf5, 0xf6, 0xf7 };
      20  VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
      21  VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
      22  VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
      23  VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
      24  					       0xf4, 0xf5, 0xf6, 0xf7,
      25  					       0xf8, 0xf9, 0xfa, 0xfb,
      26  					       0xfc, 0xfd, 0xfe, 0xff };
      27  VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
      28  					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
      29  VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
      30  					       0xfffffff2, 0xfffffff3 };
      31  VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
      32  						0xf4, 0xf5, 0xf6, 0xf7,
      33  						0xf8, 0xf9, 0xfa, 0xfb,
      34  						0xfc, 0xfd, 0xfe, 0xff };
      35  VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
      36  						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
      37  VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
      38  						0xfffffff2, 0xfffffff3 };
      39  VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
      40  						0xf4, 0xf5, 0xf6, 0xf7,
      41  						0xf8, 0xf9, 0xfa, 0xfb,
      42  						0xfc, 0xfd, 0xfe, 0xff };
      43  VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
      44  						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
      45  VECT_VAR_DECL(expected_vld2_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
      46  						  0xca00, 0xc980, 0xc900, 0xc880 };
      47  VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
      48  						  0xc1600000, 0xc1500000 };
      49  
      50  /* vld2/chunk 1.  */
      51  VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
      52  					      0xfc, 0xfd, 0xfe, 0xff };
      53  VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
      54  VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
      55  VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0xfffffffffffffff1 };
      56  VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
      57  					       0xfc, 0xfd, 0xfe, 0xff };
      58  VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
      59  VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
      60  VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
      61  VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
      62  					       0xfc, 0xfd, 0xfe, 0xff };
      63  VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
      64  VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
      65  VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
      66  VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
      67  					       0x4, 0x5, 0x6, 0x7,
      68  					       0x8, 0x9, 0xa, 0xb,
      69  					       0xc, 0xd, 0xe, 0xf };
      70  VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
      71  					       0xfffc, 0xfffd, 0xfffe, 0xffff };
      72  VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
      73  					       0xfffffff6, 0xfffffff7 };
      74  VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
      75  						0x4, 0x5, 0x6, 0x7,
      76  						0x8, 0x9, 0xa, 0xb,
      77  						0xc, 0xd, 0xe, 0xf };
      78  VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
      79  						0xfffc, 0xfffd, 0xfffe, 0xffff };
      80  VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
      81  						0xfffffff6, 0xfffffff7 };
      82  VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
      83  						0x4, 0x5, 0x6, 0x7,
      84  						0x8, 0x9, 0xa, 0xb,
      85  						0xc, 0xd, 0xe, 0xf };
      86  VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
      87  						0xfffc, 0xfffd, 0xfffe, 0xffff };
      88  VECT_VAR_DECL(expected_vld2_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
      89  						  0xc400, 0xc200, 0xc000, 0xbc00 };
      90  VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
      91  						  0xc1200000, 0xc1100000 };
      92  
      93  /* vld3/chunk 0.  */
      94  VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
      95  					      0xf4, 0xf5, 0xf6, 0xf7 };
      96  VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
      97  VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
      98  VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0xfffffffffffffff0 };
      99  VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     100  					       0xf4, 0xf5, 0xf6, 0xf7 };
     101  VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
     102  VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
     103  VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
     104  VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     105  					       0xf4, 0xf5, 0xf6, 0xf7 };
     106  VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
     107  VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
     108  VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
     109  VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     110  					       0xf4, 0xf5, 0xf6, 0xf7,
     111  					       0xf8, 0xf9, 0xfa, 0xfb,
     112  					       0xfc, 0xfd, 0xfe, 0xff };
     113  VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     114  					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     115  VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
     116  					       0xfffffff2, 0xfffffff3 };
     117  VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     118  						0xf4, 0xf5, 0xf6, 0xf7,
     119  						0xf8, 0xf9, 0xfa, 0xfb,
     120  						0xfc, 0xfd, 0xfe, 0xff };
     121  VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     122  						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     123  VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
     124  						0xfffffff2, 0xfffffff3 };
     125  VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     126  						0xf4, 0xf5, 0xf6, 0xf7,
     127  						0xf8, 0xf9, 0xfa, 0xfb,
     128  						0xfc, 0xfd, 0xfe, 0xff };
     129  VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     130  						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     131  VECT_VAR_DECL(expected_vld3_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
     132  						  0xca00, 0xc980, 0xc900, 0xc880 };
     133  VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
     134  						  0xc1600000, 0xc1500000 };
     135  
     136  /* vld3/chunk 1.  */
     137  VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
     138  					      0xfc, 0xfd, 0xfe, 0xff };
     139  VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     140  VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
     141  VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0xfffffffffffffff1 };
     142  VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
     143  					       0xfc, 0xfd, 0xfe, 0xff };
     144  VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     145  VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
     146  VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
     147  VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
     148  					       0xfc, 0xfd, 0xfe, 0xff };
     149  VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     150  VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
     151  VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
     152  VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
     153  					       0x4, 0x5, 0x6, 0x7,
     154  					       0x8, 0x9, 0xa, 0xb,
     155  					       0xc, 0xd, 0xe, 0xf };
     156  VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
     157  					       0xfffc, 0xfffd, 0xfffe, 0xffff };
     158  VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
     159  					       0xfffffff6, 0xfffffff7 };
     160  VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
     161  						0x4, 0x5, 0x6, 0x7,
     162  						0x8, 0x9, 0xa, 0xb,
     163  						0xc, 0xd, 0xe, 0xf };
     164  VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
     165  						0xfffc, 0xfffd, 0xfffe, 0xffff };
     166  VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
     167  						0xfffffff6, 0xfffffff7 };
     168  VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
     169  						0x4, 0x5, 0x6, 0x7,
     170  						0x8, 0x9, 0xa, 0xb,
     171  						0xc, 0xd, 0xe, 0xf };
     172  VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
     173  						0xfffc, 0xfffd, 0xfffe, 0xffff };
     174  VECT_VAR_DECL(expected_vld3_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
     175  						  0xc400, 0xc200, 0xc000, 0xbc00 };
     176  VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
     177  						  0xc1200000, 0xc1100000 };
     178  
     179  /* vld3/chunk 2.  */
     180  VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
     181  					      0x4, 0x5, 0x6, 0x7 };
     182  VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff8, 0xfff9,
     183  					       0xfffa, 0xfffb };
     184  VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
     185  VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0xfffffffffffffff2 };
     186  VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
     187  					       0x4, 0x5, 0x6, 0x7 };
     188  VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xfff8, 0xfff9,
     189  						0xfffa, 0xfffb };
     190  VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
     191  VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0xfffffffffffffff2 };
     192  VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
     193  					       0x4, 0x5, 0x6, 0x7 };
     194  VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
     195  						0xfffa, 0xfffb };
     196  VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
     197  VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
     198  VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
     199  					       0x14, 0x15, 0x16, 0x17,
     200  					       0x18, 0x19, 0x1a, 0x1b,
     201  					       0x1c, 0x1d, 0x1e, 0x1f };
     202  VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
     203  					       0x4, 0x5, 0x6, 0x7 };
     204  VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
     205  					       0xfffffffa, 0xfffffffb };
     206  VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
     207  						0x14, 0x15, 0x16, 0x17,
     208  						0x18, 0x19, 0x1a, 0x1b,
     209  						0x1c, 0x1d, 0x1e, 0x1f };
     210  VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
     211  						0x4, 0x5, 0x6, 0x7 };
     212  VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
     213  						0xfffffffa, 0xfffffffb };
     214  VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
     215  						0x14, 0x15, 0x16, 0x17,
     216  						0x18, 0x19, 0x1a, 0x1b,
     217  						0x1c, 0x1d, 0x1e, 0x1f };
     218  VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
     219  						0x4, 0x5, 0x6, 0x7 };
     220  VECT_VAR_DECL(expected_vld3_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
     221  						  0x4400, 0x4500, 0x4600, 0x4700 };
     222  VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
     223  						  0xc0c00000, 0xc0a00000 };
     224  
     225  /* vld4/chunk 0.  */
     226  VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     227  					      0xf4, 0xf5, 0xf6, 0xf7 };
     228  VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xfff0, 0xfff1,
     229  					       0xfff2, 0xfff3 };
     230  VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
     231  VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0xfffffffffffffff0 };
     232  VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     233  					       0xf4, 0xf5, 0xf6, 0xf7 };
     234  VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xfff0, 0xfff1,
     235  						0xfff2, 0xfff3 };
     236  VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
     237  VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
     238  VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     239  					       0xf4, 0xf5, 0xf6, 0xf7 };
     240  VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
     241  VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
     242  VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
     243  VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     244  					       0xf4, 0xf5, 0xf6, 0xf7,
     245  					       0xf8, 0xf9, 0xfa, 0xfb,
     246  					       0xfc, 0xfd, 0xfe, 0xff };
     247  VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     248  					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     249  VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
     250  					       0xfffffff2, 0xfffffff3 };
     251  VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     252  						0xf4, 0xf5, 0xf6, 0xf7,
     253  						0xf8, 0xf9, 0xfa, 0xfb,
     254  						0xfc, 0xfd, 0xfe, 0xff };
     255  VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     256  						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     257  VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
     258  						0xfffffff2, 0xfffffff3 };
     259  VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
     260  						0xf4, 0xf5, 0xf6, 0xf7,
     261  						0xf8, 0xf9, 0xfa, 0xfb,
     262  						0xfc, 0xfd, 0xfe, 0xff };
     263  VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
     264  						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     265  VECT_VAR_DECL(expected_vld4_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
     266  						  0xca00, 0xc980, 0xc900, 0xc880 };
     267  VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
     268  						  0xc1600000, 0xc1500000 };
     269  
     270  /* vld4/chunk 1.  */
     271  VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
     272  					      0xfc, 0xfd, 0xfe, 0xff };
     273  VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     274  VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
     275  VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0xfffffffffffffff1 };
     276  VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
     277  					       0xfc, 0xfd, 0xfe, 0xff };
     278  VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     279  VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
     280  VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
     281  VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
     282  					       0xfc, 0xfd, 0xfe, 0xff };
     283  VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
     284  VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
     285  VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
     286  VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
     287  					       0x4, 0x5, 0x6, 0x7,
     288  					       0x8, 0x9, 0xa, 0xb,
     289  					       0xc, 0xd, 0xe, 0xf };
     290  VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
     291  					       0xfffc, 0xfffd, 0xfffe, 0xffff };
     292  VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
     293  					       0xfffffff6, 0xfffffff7 };
     294  VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
     295  						0x4, 0x5, 0x6, 0x7,
     296  						0x8, 0x9, 0xa, 0xb,
     297  						0xc, 0xd, 0xe, 0xf };
     298  VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
     299  						0xfffc, 0xfffd, 0xfffe, 0xffff };
     300  VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
     301  						0xfffffff6, 0xfffffff7 };
     302  VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
     303  						0x4, 0x5, 0x6, 0x7,
     304  						0x8, 0x9, 0xa, 0xb,
     305  						0xc, 0xd, 0xe, 0xf };
     306  VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
     307  						0xfffc, 0xfffd, 0xfffe, 0xffff };
     308  VECT_VAR_DECL(expected_vld4_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500,
     309  						  0xc400, 0xc200, 0xc000, 0xbc00 };
     310  VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
     311  						  0xc1200000, 0xc1100000 };
     312  
     313  /* vld4/chunk 2.  */
     314  VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
     315  					      0x4, 0x5, 0x6, 0x7 };
     316  VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
     317  VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
     318  VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0xfffffffffffffff2 };
     319  VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
     320  					       0x4, 0x5, 0x6, 0x7 };
     321  VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
     322  VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
     323  VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
     324  VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
     325  					       0x4, 0x5, 0x6, 0x7 };
     326  VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
     327  VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
     328  VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
     329  VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
     330  					       0x14, 0x15, 0x16, 0x17,
     331  					       0x18, 0x19, 0x1a, 0x1b,
     332  					       0x1c, 0x1d, 0x1e, 0x1f };
     333  VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
     334  					       0x4, 0x5, 0x6, 0x7 };
     335  VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
     336  					       0xfffffffa, 0xfffffffb };
     337  VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
     338  						0x14, 0x15, 0x16, 0x17,
     339  						0x18, 0x19, 0x1a, 0x1b,
     340  						0x1c, 0x1d, 0x1e, 0x1f };
     341  VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
     342  						0x4, 0x5, 0x6, 0x7 };
     343  VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
     344  						0xfffffffa, 0xfffffffb };
     345  VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
     346  						0x14, 0x15, 0x16, 0x17,
     347  						0x18, 0x19, 0x1a, 0x1b,
     348  						0x1c, 0x1d, 0x1e, 0x1f };
     349  VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
     350  						0x4, 0x5, 0x6, 0x7 };
     351  VECT_VAR_DECL(expected_vld4_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200,
     352  						  0x4400, 0x4500, 0x4600, 0x4700 };
     353  VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
     354  						  0xc0c00000, 0xc0a00000 };
     355  
     356  /* vld4/chunk 3.  */
     357  VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
     358  					      0xc, 0xd, 0xe, 0xf };
     359  VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
     360  VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xfffffff6, 0xfffffff7 };
     361  VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0xfffffffffffffff3 };
     362  VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
     363  					       0xc, 0xd, 0xe, 0xf };
     364  VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
     365  VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff6, 0xfffffff7 };
     366  VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
     367  VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
     368  					       0xc, 0xd, 0xe, 0xf };
     369  VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
     370  VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
     371  VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
     372  VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
     373  					       0x24, 0x25, 0x26, 0x27,
     374  					       0x28, 0x29, 0x2a, 0x2b,
     375  					       0x2c, 0x2d, 0x2e, 0x2f };
     376  VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
     377  					       0xc, 0xd, 0xe, 0xf };
     378  VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xfffffffc, 0xfffffffd,
     379  					       0xfffffffe, 0xffffffff };
     380  VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
     381  						0x24, 0x25, 0x26, 0x27,
     382  						0x28, 0x29, 0x2a, 0x2b,
     383  						0x2c, 0x2d, 0x2e, 0x2f };
     384  VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
     385  						0xc, 0xd, 0xe, 0xf };
     386  VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xfffffffc, 0xfffffffd,
     387  						0xfffffffe, 0xffffffff };
     388  VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
     389  						0x24, 0x25, 0x26, 0x27,
     390  						0x28, 0x29, 0x2a, 0x2b,
     391  						0x2c, 0x2d, 0x2e, 0x2f };
     392  VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
     393  						0xc, 0xd, 0xe, 0xf };
     394  VECT_VAR_DECL(expected_vld4_3,hfloat,16,8) [] = { 0x4800, 0x4880, 0x4900, 0x4980,
     395  						  0x4a00, 0x4a80, 0x4b00, 0x4b80 };
     396  VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xc0800000, 0xc0400000,
     397  						  0xc0000000, 0xbf800000 };
     398  
     399  void exec_vldX (void)
     400  {
     401    /* In this case, input variables are arrays of vectors.  */
     402  #define DECL_VLDX(T1, W, N, X)						\
     403    VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
     404    VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
     405  
     406    /* We need to use a temporary result buffer (result_bis), because
     407       the one used for other tests is not large enough. A subset of the
     408       result data is moved from result_bis to result, and it is this
     409       subset which is used to check the actual behavior. The next
     410       macro enables to move another chunk of data from result_bis to
     411       result.  */
     412  #define TEST_VLDX(Q, T1, T2, W, N, X)					\
     413    VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
     414      /* Use dedicated init buffer, of size X */				\
     415      vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X));	\
     416    vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
     417  		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
     418    memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
     419  	 sizeof(VECT_VAR(result, T1, W, N)));
     420  
     421    /* Overwrite "result" with the contents of "result_bis"[Y].  */
     422  #define TEST_EXTRA_CHUNK(T1, W, N, X,Y)			\
     423    memcpy(VECT_VAR(result, T1, W, N),			\
     424  	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
     425  	 sizeof(VECT_VAR(result, T1, W, N)));
     426  
     427    /* We need all variants in 64 bits, but there is no 64x2 variant.  */
     428  #define DECL_ALL_VLDX_NO_FP16(X)		\
     429    DECL_VLDX(int, 8, 8, X);			\
     430    DECL_VLDX(int, 16, 4, X);			\
     431    DECL_VLDX(int, 32, 2, X);			\
     432    DECL_VLDX(int, 64, 1, X);			\
     433    DECL_VLDX(uint, 8, 8, X);			\
     434    DECL_VLDX(uint, 16, 4, X);			\
     435    DECL_VLDX(uint, 32, 2, X);			\
     436    DECL_VLDX(uint, 64, 1, X);			\
     437    DECL_VLDX(poly, 8, 8, X);			\
     438    DECL_VLDX(poly, 16, 4, X);			\
     439    DECL_VLDX(float, 32, 2, X);			\
     440    DECL_VLDX(int, 8, 16, X);			\
     441    DECL_VLDX(int, 16, 8, X);			\
     442    DECL_VLDX(int, 32, 4, X);			\
     443    DECL_VLDX(uint, 8, 16, X);			\
     444    DECL_VLDX(uint, 16, 8, X);			\
     445    DECL_VLDX(uint, 32, 4, X);			\
     446    DECL_VLDX(poly, 8, 16, X);			\
     447    DECL_VLDX(poly, 16, 8, X);			\
     448    DECL_VLDX(float, 32, 4, X)
     449  
     450  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     451  #define DECL_ALL_VLDX(X)	\
     452    DECL_ALL_VLDX_NO_FP16(X);	\
     453    DECL_VLDX(float, 16, 4, X);	\
     454    DECL_VLDX(float, 16, 8, X)
     455  #else
     456  #define DECL_ALL_VLDX(X) DECL_ALL_VLDX_NO_FP16(X)
     457  #endif
     458  
     459  #define TEST_ALL_VLDX_NO_FP16(X)		\
     460    TEST_VLDX(, int, s, 8, 8, X);			\
     461    TEST_VLDX(, int, s, 16, 4, X);		\
     462    TEST_VLDX(, int, s, 32, 2, X);		\
     463    TEST_VLDX(, int, s, 64, 1, X);		\
     464    TEST_VLDX(, uint, u, 8, 8, X);		\
     465    TEST_VLDX(, uint, u, 16, 4, X);		\
     466    TEST_VLDX(, uint, u, 32, 2, X);		\
     467    TEST_VLDX(, uint, u, 64, 1, X);		\
     468    TEST_VLDX(, poly, p, 8, 8, X);		\
     469    TEST_VLDX(, poly, p, 16, 4, X);		\
     470    TEST_VLDX(, float, f, 32, 2, X);		\
     471    TEST_VLDX(q, int, s, 8, 16, X);		\
     472    TEST_VLDX(q, int, s, 16, 8, X);		\
     473    TEST_VLDX(q, int, s, 32, 4, X);		\
     474    TEST_VLDX(q, uint, u, 8, 16, X);		\
     475    TEST_VLDX(q, uint, u, 16, 8, X);		\
     476    TEST_VLDX(q, uint, u, 32, 4, X);		\
     477    TEST_VLDX(q, poly, p, 8, 16, X);		\
     478    TEST_VLDX(q, poly, p, 16, 8, X);		\
     479    TEST_VLDX(q, float, f, 32, 4, X)
     480  
     481  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     482  #define TEST_ALL_VLDX(X)		\
     483    TEST_ALL_VLDX_NO_FP16(X);		\
     484    TEST_VLDX(, float, f, 16, 4, X);	\
     485    TEST_VLDX(q, float, f, 16, 8, X)
     486  #else
     487  #define TEST_ALL_VLDX(X) TEST_ALL_VLDX_NO_FP16(X)
     488  #endif
     489  
     490  #define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)	\
     491    TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
     492    TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
     493    TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
     494    TEST_EXTRA_CHUNK(int, 64, 1, X, Y);		\
     495    TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
     496    TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
     497    TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
     498    TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
     499    TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
     500    TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
     501    TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
     502    TEST_EXTRA_CHUNK(int, 8, 16, X, Y);		\
     503    TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
     504    TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
     505    TEST_EXTRA_CHUNK(uint, 8, 16, X, Y);		\
     506    TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
     507    TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
     508    TEST_EXTRA_CHUNK(poly, 8, 16, X, Y);		\
     509    TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
     510    TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
     511  
     512  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     513  #define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
     514    TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)		\
     515    TEST_EXTRA_CHUNK(float, 16, 4, X, Y);		\
     516    TEST_EXTRA_CHUNK(float, 16, 8, X, Y);
     517  #else
     518  #define TEST_ALL_EXTRA_CHUNKS(X, Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)
     519  #endif
     520  
     521    /* vldX supports all vector types except [u]int64x2.  */
     522  #define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment)		\
     523      CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
     524      CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
     525      CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
     526      CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);		\
     527      CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
     528      CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
     529      CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
     530      CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
     531      CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);	\
     532      CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);	\
     533      CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
     534  									\
     535      CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
     536      CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
     537      CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
     538      CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);		\
     539      CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
     540      CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
     541      CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);	\
     542      CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);	\
     543      CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
     544  
     545  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     546  #define CHECK_RESULTS_VLDX(test_name,EXPECTED,comment)			\
     547    {									\
     548      CHECK_RESULTS_VLDX_NO_FP16(test_name, EXPECTED, comment);		\
     549      CHECK_FP(test_name, float, 16, 4, PRIx16, EXPECTED, comment);	\
     550      CHECK_FP(test_name, float, 16, 8, PRIx16, EXPECTED, comment);	\
     551    }
     552  #else
     553  #define CHECK_RESULTS_VLDX(test_name, EXPECTED, comment)		\
     554    { CHECK_RESULTS_VLDX_NO_FP16(test_name, EXPECTED, comment); }
     555  #endif
     556  
     557    DECL_ALL_VLDX(2);
     558    DECL_ALL_VLDX(3);
     559    DECL_ALL_VLDX(4);
     560  
     561    /* Special input buffers of suitable size are needed for vld2/vld3/vld4.  */
     562    /* Input buffers for vld2, 1 of each size */
     563    VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8);
     564    PAD(buffer_vld2_pad, int, 8, 8);
     565    VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4);
     566    PAD(buffer_vld2_pad, int, 16, 4);
     567    VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2);
     568    PAD(buffer_vld2_pad, int, 32, 2);
     569    VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1);
     570    PAD(buffer_vld2_pad, int, 64, 1);
     571    VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8);
     572    PAD(buffer_vld2_pad, uint, 8, 8);
     573    VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4);
     574    PAD(buffer_vld2_pad, uint, 16, 4);
     575    VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2);
     576    PAD(buffer_vld2_pad, uint, 32, 2);
     577    VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1);
     578    PAD(buffer_vld2_pad, uint, 64, 1);
     579    VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 8);
     580    PAD(buffer_vld2_pad, poly, 8, 8);
     581    VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
     582    PAD(buffer_vld2_pad, poly, 16, 4);
     583  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     584    VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
     585    PAD(buffer_vld2_pad, float, 16, 4);
     586  #endif
     587    VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
     588    PAD(buffer_vld2_pad, float, 32, 2);
     589  
     590    VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16);
     591    PAD(buffer_vld2_pad, int, 8, 16);
     592    VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8);
     593    PAD(buffer_vld2_pad, int, 16, 8);
     594    VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4);
     595    PAD(buffer_vld2_pad, int, 32, 4);
     596    VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2);
     597    PAD(buffer_vld2_pad, int, 64, 2);
     598    VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16);
     599    PAD(buffer_vld2_pad, uint, 8, 16);
     600    VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8);
     601    PAD(buffer_vld2_pad, uint, 16, 8);
     602    VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4);
     603    PAD(buffer_vld2_pad, uint, 32, 4);
     604    VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2);
     605    PAD(buffer_vld2_pad, uint, 64, 2);
     606    VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 16);
     607    PAD(buffer_vld2_pad, poly, 8, 16);
     608    VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
     609    PAD(buffer_vld2_pad, poly, 16, 8);
     610  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     611    VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
     612    PAD(buffer_vld2_pad, float, 16, 8);
     613  #endif
     614    VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
     615    PAD(buffer_vld2_pad, float, 32, 4);
     616  
     617    /* Input buffers for vld3, 1 of each size */
     618    VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8);
     619    PAD(buffer_vld3_pad, int, 8, 8);
     620    VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4);
     621    PAD(buffer_vld3_pad, int, 16, 4);
     622    VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2);
     623    PAD(buffer_vld3_pad, int, 32, 2);
     624    VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1);
     625    PAD(buffer_vld3_pad, int, 64, 1);
     626    VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8);
     627    PAD(buffer_vld3_pad, uint, 8, 8);
     628    VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4);
     629    PAD(buffer_vld3_pad, uint, 16, 4);
     630    VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2);
     631    PAD(buffer_vld3_pad, uint, 32, 2);
     632    VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1);
     633    PAD(buffer_vld3_pad, uint, 64, 1);
     634    VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 8);
     635    PAD(buffer_vld3_pad, poly, 8, 8);
     636    VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
     637    PAD(buffer_vld3_pad, poly, 16, 4);
     638  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     639    VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
     640    PAD(buffer_vld3_pad, float, 16, 4);
     641  #endif
     642    VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
     643    PAD(buffer_vld3_pad, float, 32, 2);
     644  
     645    VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16);
     646    PAD(buffer_vld3_pad, int, 8, 16);
     647    VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8);
     648    PAD(buffer_vld3_pad, int, 16, 8);
     649    VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4);
     650    PAD(buffer_vld3_pad, int, 32, 4);
     651    VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2);
     652    PAD(buffer_vld3_pad, int, 64, 2);
     653    VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16);
     654    PAD(buffer_vld3_pad, uint, 8, 16);
     655    VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8);
     656    PAD(buffer_vld3_pad, uint, 16, 8);
     657    VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4);
     658    PAD(buffer_vld3_pad, uint, 32, 4);
     659    VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2);
     660    PAD(buffer_vld3_pad, uint, 64, 2);
     661    VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 16);
     662    PAD(buffer_vld3_pad, poly, 8, 16);
     663    VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
     664    PAD(buffer_vld3_pad, poly, 16, 8);
     665  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     666    VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
     667    PAD(buffer_vld3_pad, float, 16, 8);
     668  #endif
     669    VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
     670    PAD(buffer_vld3_pad, float, 32, 4);
     671  
     672    /* Input buffers for vld4, 1 of each size */
     673    VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8);
     674    PAD(buffer_vld4_pad, int, 8, 8);
     675    VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4);
     676    PAD(buffer_vld4_pad, int, 16, 4);
     677    VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2);
     678    PAD(buffer_vld4_pad, int, 32, 2);
     679    VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1);
     680    PAD(buffer_vld4_pad, int, 64, 1);
     681    VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8);
     682    PAD(buffer_vld4_pad, uint, 8, 8);
     683    VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4);
     684    PAD(buffer_vld4_pad, uint, 16, 4);
     685    VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2);
     686    PAD(buffer_vld4_pad, uint, 32, 2);
     687    VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1);
     688    PAD(buffer_vld4_pad, uint, 64, 1);
     689    VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 8);
     690    PAD(buffer_vld4_pad, poly, 8, 8);
     691    VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
     692    PAD(buffer_vld4_pad, poly, 16, 4);
     693  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     694    VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
     695    PAD(buffer_vld4_pad, float, 16, 4);
     696  #endif
     697    VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
     698    PAD(buffer_vld4_pad, float, 32, 2);
     699  
     700    VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16);
     701    PAD(buffer_vld4_pad, int, 8, 16);
     702    VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8);
     703    PAD(buffer_vld4_pad, int, 16, 8);
     704    VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4);
     705    PAD(buffer_vld4_pad, int, 32, 4);
     706    VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2);
     707    PAD(buffer_vld4_pad, int, 64, 2);
     708    VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16);
     709    PAD(buffer_vld4_pad, uint, 8, 16);
     710    VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8);
     711    PAD(buffer_vld4_pad, uint, 16, 8);
     712    VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4);
     713    PAD(buffer_vld4_pad, uint, 32, 4);
     714    VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2);
     715    PAD(buffer_vld4_pad, uint, 64, 2);
     716    VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 16);
     717    PAD(buffer_vld4_pad, poly, 8, 16);
     718    VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
     719    PAD(buffer_vld4_pad, poly, 16, 8);
     720  #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
     721    VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
     722    PAD(buffer_vld4_pad, float, 16, 8);
     723  #endif
     724    VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
     725    PAD(buffer_vld4_pad, float, 32, 4);
     726  
     727    /* Check vld2/vld2q.  */
     728    clean_results ();
     729  #define TEST_MSG "VLD2/VLD2Q"
     730    TEST_ALL_VLDX(2);
     731    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_0, "chunk 0");
     732  
     733    TEST_ALL_EXTRA_CHUNKS(2, 1);
     734    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld2_1, "chunk 1");
     735  
     736    /* Check vld3/vld3q.  */
     737    clean_results ();
     738  #undef TEST_MSG
     739  #define TEST_MSG "VLD3/VLD3Q"
     740    TEST_ALL_VLDX(3);
     741    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_0, "chunk 0");
     742  
     743    TEST_ALL_EXTRA_CHUNKS(3, 1);
     744    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_1, "chunk 1");
     745  
     746    TEST_ALL_EXTRA_CHUNKS(3, 2);
     747    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld3_2, "chunk 2");
     748  
     749    /* Check vld4/vld4q.  */
     750    clean_results ();
     751  #undef TEST_MSG
     752  #define TEST_MSG "VLD4/VLD4Q"
     753    TEST_ALL_VLDX(4);
     754    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_0, "chunk 0");
     755  
     756    TEST_ALL_EXTRA_CHUNKS(4, 1);
     757    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_1, "chunk 1");
     758  
     759    TEST_ALL_EXTRA_CHUNKS(4, 2);
     760    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_2, "chunk 2");
     761  
     762    TEST_ALL_EXTRA_CHUNKS(4, 3);
     763    CHECK_RESULTS_VLDX (TEST_MSG, expected_vld4_3, "chunk 3");
     764  }
     765  
     766  int main (void)
     767  {
     768    exec_vldX ();
     769    return 0;
     770  }