1  /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
       2  
       3  #include "test_sve_acle.h"
       4  
       5  /*
       6  ** ld3_f32_base:
       7  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
       8  **	ret
       9  */
      10  TEST_LOAD (ld3_f32_base, svfloat32x3_t, float32_t,
      11  	   z0 = svld3_f32 (p0, x0),
      12  	   z0 = svld3 (p0, x0))
      13  
      14  /*
      15  ** ld3_f32_index:
      16  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, x1, lsl 2\]
      17  **	ret
      18  */
      19  TEST_LOAD (ld3_f32_index, svfloat32x3_t, float32_t,
      20  	   z0 = svld3_f32 (p0, x0 + x1),
      21  	   z0 = svld3 (p0, x0 + x1))
      22  
      23  /* Moving the constant into a register would also be OK.  */
      24  /*
      25  ** ld3_f32_1:
      26  **	incb	x0
      27  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
      28  **	ret
      29  */
      30  TEST_LOAD (ld3_f32_1, svfloat32x3_t, float32_t,
      31  	   z0 = svld3_f32 (p0, x0 + svcntw ()),
      32  	   z0 = svld3 (p0, x0 + svcntw ()))
      33  
      34  /* Moving the constant into a register would also be OK.  */
      35  /*
      36  ** ld3_f32_2:
      37  **	incb	x0, all, mul #2
      38  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
      39  **	ret
      40  */
      41  TEST_LOAD (ld3_f32_2, svfloat32x3_t, float32_t,
      42  	   z0 = svld3_f32 (p0, x0 + svcntw () * 2),
      43  	   z0 = svld3 (p0, x0 + svcntw () * 2))
      44  
      45  /*
      46  ** ld3_f32_3:
      47  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\]
      48  **	ret
      49  */
      50  TEST_LOAD (ld3_f32_3, svfloat32x3_t, float32_t,
      51  	   z0 = svld3_f32 (p0, x0 + svcntw () * 3),
      52  	   z0 = svld3 (p0, x0 + svcntw () * 3))
      53  
      54  /*
      55  ** ld3_f32_21:
      56  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\]
      57  **	ret
      58  */
      59  TEST_LOAD (ld3_f32_21, svfloat32x3_t, float32_t,
      60  	   z0 = svld3_f32 (p0, x0 + svcntw () * 21),
      61  	   z0 = svld3 (p0, x0 + svcntw () * 21))
      62  
      63  /*
      64  ** ld3_f32_24:
      65  **	addvl	(x[0-9]+), x0, #24
      66  **	ld3w	{z0\.s - z2\.s}, p0/z, \[\1\]
      67  **	ret
      68  */
      69  TEST_LOAD (ld3_f32_24, svfloat32x3_t, float32_t,
      70  	   z0 = svld3_f32 (p0, x0 + svcntw () * 24),
      71  	   z0 = svld3 (p0, x0 + svcntw () * 24))
      72  
      73  /* Moving the constant into a register would also be OK.  */
      74  /*
      75  ** ld3_f32_m1:
      76  **	decb	x0
      77  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
      78  **	ret
      79  */
      80  TEST_LOAD (ld3_f32_m1, svfloat32x3_t, float32_t,
      81  	   z0 = svld3_f32 (p0, x0 - svcntw ()),
      82  	   z0 = svld3 (p0, x0 - svcntw ()))
      83  
      84  /* Moving the constant into a register would also be OK.  */
      85  /*
      86  ** ld3_f32_m2:
      87  **	decb	x0, all, mul #2
      88  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
      89  **	ret
      90  */
      91  TEST_LOAD (ld3_f32_m2, svfloat32x3_t, float32_t,
      92  	   z0 = svld3_f32 (p0, x0 - svcntw () * 2),
      93  	   z0 = svld3 (p0, x0 - svcntw () * 2))
      94  
      95  /*
      96  ** ld3_f32_m3:
      97  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\]
      98  **	ret
      99  */
     100  TEST_LOAD (ld3_f32_m3, svfloat32x3_t, float32_t,
     101  	   z0 = svld3_f32 (p0, x0 - svcntw () * 3),
     102  	   z0 = svld3 (p0, x0 - svcntw () * 3))
     103  
     104  /*
     105  ** ld3_f32_m24:
     106  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\]
     107  **	ret
     108  */
     109  TEST_LOAD (ld3_f32_m24, svfloat32x3_t, float32_t,
     110  	   z0 = svld3_f32 (p0, x0 - svcntw () * 24),
     111  	   z0 = svld3 (p0, x0 - svcntw () * 24))
     112  
     113  /*
     114  ** ld3_f32_m27:
     115  **	addvl	(x[0-9]+), x0, #-27
     116  **	ld3w	{z0\.s - z2\.s}, p0/z, \[\1\]
     117  **	ret
     118  */
     119  TEST_LOAD (ld3_f32_m27, svfloat32x3_t, float32_t,
     120  	   z0 = svld3_f32 (p0, x0 - svcntw () * 27),
     121  	   z0 = svld3 (p0, x0 - svcntw () * 27))
     122  
     123  /*
     124  ** ld3_vnum_f32_0:
     125  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
     126  **	ret
     127  */
     128  TEST_LOAD (ld3_vnum_f32_0, svfloat32x3_t, float32_t,
     129  	   z0 = svld3_vnum_f32 (p0, x0, 0),
     130  	   z0 = svld3_vnum (p0, x0, 0))
     131  
     132  /* Moving the constant into a register would also be OK.  */
     133  /*
     134  ** ld3_vnum_f32_1:
     135  **	incb	x0
     136  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
     137  **	ret
     138  */
     139  TEST_LOAD (ld3_vnum_f32_1, svfloat32x3_t, float32_t,
     140  	   z0 = svld3_vnum_f32 (p0, x0, 1),
     141  	   z0 = svld3_vnum (p0, x0, 1))
     142  
     143  /* Moving the constant into a register would also be OK.  */
     144  /*
     145  ** ld3_vnum_f32_2:
     146  **	incb	x0, all, mul #2
     147  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
     148  **	ret
     149  */
     150  TEST_LOAD (ld3_vnum_f32_2, svfloat32x3_t, float32_t,
     151  	   z0 = svld3_vnum_f32 (p0, x0, 2),
     152  	   z0 = svld3_vnum (p0, x0, 2))
     153  
     154  /*
     155  ** ld3_vnum_f32_3:
     156  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\]
     157  **	ret
     158  */
     159  TEST_LOAD (ld3_vnum_f32_3, svfloat32x3_t, float32_t,
     160  	   z0 = svld3_vnum_f32 (p0, x0, 3),
     161  	   z0 = svld3_vnum (p0, x0, 3))
     162  
     163  /*
     164  ** ld3_vnum_f32_21:
     165  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\]
     166  **	ret
     167  */
     168  TEST_LOAD (ld3_vnum_f32_21, svfloat32x3_t, float32_t,
     169  	   z0 = svld3_vnum_f32 (p0, x0, 21),
     170  	   z0 = svld3_vnum (p0, x0, 21))
     171  
     172  /*
     173  ** ld3_vnum_f32_24:
     174  **	addvl	(x[0-9]+), x0, #24
     175  **	ld3w	{z0\.s - z2\.s}, p0/z, \[\1\]
     176  **	ret
     177  */
     178  TEST_LOAD (ld3_vnum_f32_24, svfloat32x3_t, float32_t,
     179  	   z0 = svld3_vnum_f32 (p0, x0, 24),
     180  	   z0 = svld3_vnum (p0, x0, 24))
     181  
     182  /* Moving the constant into a register would also be OK.  */
     183  /*
     184  ** ld3_vnum_f32_m1:
     185  **	decb	x0
     186  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
     187  **	ret
     188  */
     189  TEST_LOAD (ld3_vnum_f32_m1, svfloat32x3_t, float32_t,
     190  	   z0 = svld3_vnum_f32 (p0, x0, -1),
     191  	   z0 = svld3_vnum (p0, x0, -1))
     192  
     193  /* Moving the constant into a register would also be OK.  */
     194  /*
     195  ** ld3_vnum_f32_m2:
     196  **	decb	x0, all, mul #2
     197  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0\]
     198  **	ret
     199  */
     200  TEST_LOAD (ld3_vnum_f32_m2, svfloat32x3_t, float32_t,
     201  	   z0 = svld3_vnum_f32 (p0, x0, -2),
     202  	   z0 = svld3_vnum (p0, x0, -2))
     203  
     204  /*
     205  ** ld3_vnum_f32_m3:
     206  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\]
     207  **	ret
     208  */
     209  TEST_LOAD (ld3_vnum_f32_m3, svfloat32x3_t, float32_t,
     210  	   z0 = svld3_vnum_f32 (p0, x0, -3),
     211  	   z0 = svld3_vnum (p0, x0, -3))
     212  
     213  /*
     214  ** ld3_vnum_f32_m24:
     215  **	ld3w	{z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\]
     216  **	ret
     217  */
     218  TEST_LOAD (ld3_vnum_f32_m24, svfloat32x3_t, float32_t,
     219  	   z0 = svld3_vnum_f32 (p0, x0, -24),
     220  	   z0 = svld3_vnum (p0, x0, -24))
     221  
     222  /*
     223  ** ld3_vnum_f32_m27:
     224  **	addvl	(x[0-9]+), x0, #-27
     225  **	ld3w	{z0\.s - z2\.s}, p0/z, \[\1\]
     226  **	ret
     227  */
     228  TEST_LOAD (ld3_vnum_f32_m27, svfloat32x3_t, float32_t,
     229  	   z0 = svld3_vnum_f32 (p0, x0, -27),
     230  	   z0 = svld3_vnum (p0, x0, -27))
     231  
     232  /* Using MUL to calculate an index would also be OK.  */
     233  /*
     234  ** ld3_vnum_f32_x1:
     235  **	cntb	(x[0-9]+)
     236  **	madd	(x[0-9]+), (x1, \1|\1, x1), x0
     237  **	ld3w	{z0\.s - z2\.s}, p0/z, \[\2\]
     238  **	ret
     239  */
     240  TEST_LOAD (ld3_vnum_f32_x1, svfloat32x3_t, float32_t,
     241  	   z0 = svld3_vnum_f32 (p0, x0, x1),
     242  	   z0 = svld3_vnum (p0, x0, x1))