1  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
       2  /* { dg-options "-O -msve-vector-bits=2048 -mlittle-endian --save-temps" } */
       3  /* { dg-final { check-function-bodies "**" "" } } */
       4  
       5  typedef unsigned char v128qi __attribute__((vector_size(128)));
       6  typedef unsigned char v64qi __attribute__((vector_size(64)));
       7  typedef unsigned char v32qi __attribute__((vector_size(32)));
       8  typedef unsigned short v64hi __attribute__((vector_size(128)));
       9  typedef unsigned short v32hi __attribute__((vector_size(64)));
      10  typedef _Float16 v64hf __attribute__((vector_size(128)));
      11  typedef _Float16 v32hf __attribute__((vector_size(64)));
      12  typedef __bf16 v64bf __attribute__((vector_size(128)));
      13  typedef __bf16 v32bf __attribute__((vector_size(64)));
      14  typedef unsigned int v32si __attribute__((vector_size(128)));
      15  typedef float v32sf __attribute__((vector_size(128)));
      16  
      17  #define PERM0(B) B, B + 1
      18  #define PERM1(B) PERM0 (B), PERM0 (B + 2)
      19  #define PERM2(B) PERM1 (B), PERM1 (B + 4)
      20  #define PERM3(B) PERM2 (B), PERM2 (B + 8)
      21  #define PERM4(B) PERM3 (B), PERM3 (B + 16)
      22  #define PERM5(B) PERM4 (B), PERM4 (B + 32)
      23  #define PERM6(B) PERM5 (B), PERM5 (B + 64)
      24  
      25  /*
      26  ** qi_ext_h_1:
      27  **	ptrue	(p[0-7])\.b, vl256
      28  **	ld1b	(z[0-9]+)\.h, \1/z, \[x0\]
      29  **	ext	\2\.b, \2\.b, \2\.b, #2
      30  **	st1b	\2\.h, \1, \[x8\]
      31  **	ret
      32  */
      33  v128qi
      34  qi_ext_h_1 (v128qi x)
      35  {
      36    return __builtin_shuffle (x, x, (v128qi) { PERM6 (1) });
      37  }
      38  
      39  /*
      40  ** qi_ext_h_1_two_op:
      41  **	ptrue	(p[0-7])\.b, vl256
      42  ** (
      43  **	ld1b	(z[0-9]+)\.h, \1/z, \[x1\]
      44  **	ld1b	(z[0-9]+)\.h, \1/z, \[x0\]
      45  **	ext	\3\.b, \3\.b, \2\.b, #2
      46  **	st1b	\3\.h, \1, \[x8\]
      47  ** |
      48  **	ld1b	(z[0-9]+)\.h, \1/z, \[x0\]
      49  **	ld1b	(z[0-9]+)\.h, \1/z, \[x1\]
      50  **	ext	\4\.b, \4\.b, \5\.b, #2
      51  **	st1b	\4\.h, \1, \[x8\]
      52  ** )
      53  **	ret
      54  */
      55  v128qi
      56  qi_ext_h_1_two_op (v128qi x, v128qi y)
      57  {
      58    return __builtin_shuffle (x, y, (v128qi) { PERM6 (1) });
      59  }
      60  
      61  /*
      62  ** qi_ext_h_127:
      63  **	ptrue	(p[0-7])\.b, vl256
      64  **	ld1b	(z[0-9]+)\.h, \1/z, \[x0\]
      65  **	ext	\2\.b, \2\.b, \2\.b, #254
      66  **	st1b	\2\.h, \1, \[x8\]
      67  **	ret
      68  */
      69  v128qi
      70  qi_ext_h_127 (v128qi x)
      71  {
      72    return __builtin_shuffle (x, x, (v128qi) { PERM6 (127) });
      73  }
      74  
      75  /*
      76  ** qi_ext_s_1:
      77  **	ptrue	(p[0-7])\.b, vl256
      78  **	ld1b	(z[0-9]+)\.s, \1/z, \[x0\]
      79  **	ext	\2\.b, \2\.b, \2\.b, #4
      80  **	st1b	\2\.s, \1, \[x8\]
      81  **	ret
      82  */
      83  v64qi
      84  qi_ext_s_1 (v64qi x)
      85  {
      86    return __builtin_shuffle (x, x, (v64qi) { PERM5 (1) });
      87  }
      88  
      89  /*
      90  ** qi_ext_s_63:
      91  **	ptrue	(p[0-7])\.b, vl256
      92  **	ld1b	(z[0-9]+)\.s, \1/z, \[x0\]
      93  **	ext	\2\.b, \2\.b, \2\.b, #252
      94  **	st1b	\2\.s, \1, \[x8\]
      95  **	ret
      96  */
      97  v64qi
      98  qi_ext_s_63 (v64qi x)
      99  {
     100    return __builtin_shuffle (x, x, (v64qi) { PERM5 (63) });
     101  }
     102  
     103  /*
     104  ** qi_ext_d_1:
     105  **	ptrue	(p[0-7])\.b, vl256
     106  **	ld1b	(z[0-9]+)\.d, \1/z, \[x0\]
     107  **	ext	\2\.b, \2\.b, \2\.b, #8
     108  **	st1b	\2\.d, \1, \[x8\]
     109  **	ret
     110  */
     111  v32qi
     112  qi_ext_d_1 (v32qi x)
     113  {
     114    return __builtin_shuffle (x, x, (v32qi) { PERM4 (1) });
     115  }
     116  
     117  /*
     118  ** qi_ext_d_31:
     119  **	ptrue	(p[0-7])\.b, vl256
     120  **	ld1b	(z[0-9]+)\.d, \1/z, \[x0\]
     121  **	ext	\2\.b, \2\.b, \2\.b, #248
     122  **	st1b	\2\.d, \1, \[x8\]
     123  **	ret
     124  */
     125  v32qi
     126  qi_ext_d_31 (v32qi x)
     127  {
     128    return __builtin_shuffle (x, x, (v32qi) { PERM4 (31) });
     129  }
     130  
     131  /*
     132  ** hi_ext_s_1:
     133  **	ptrue	(p[0-7])\.b, vl256
     134  **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
     135  **	ext	\2\.b, \2\.b, \2\.b, #4
     136  **	st1h	\2\.s, \1, \[x8\]
     137  **	ret
     138  */
     139  v64hi
     140  hi_ext_s_1 (v64hi x)
     141  {
     142    return __builtin_shuffle (x, x, (v64hi) { PERM5 (1) });
     143  }
     144  
     145  /*
     146  ** hi_ext_s_63:
     147  **	ptrue	(p[0-7])\.b, vl256
     148  **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
     149  **	ext	\2\.b, \2\.b, \2\.b, #252
     150  **	st1h	\2\.s, \1, \[x8\]
     151  **	ret
     152  */
     153  v64hi
     154  hi_ext_s_63 (v64hi x)
     155  {
     156    return __builtin_shuffle (x, x, (v64hi) { PERM5 (63) });
     157  }
     158  
     159  /*
     160  ** hf_ext_s_1:
     161  **	ptrue	(p[0-7])\.b, vl256
     162  **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
     163  **	ext	\2\.b, \2\.b, \2\.b, #4
     164  **	st1h	\2\.s, \1, \[x8\]
     165  **	ret
     166  */
     167  v64hf
     168  hf_ext_s_1 (v64hf x)
     169  {
     170    return __builtin_shuffle (x, x, (v64hi) { PERM5 (1) });
     171  }
     172  
     173  /*
     174  ** hf_ext_s_60:
     175  **	ptrue	(p[0-7])\.b, vl256
     176  **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
     177  **	ext	\2\.b, \2\.b, \2\.b, #240
     178  **	st1h	\2\.s, \1, \[x8\]
     179  **	ret
     180  */
     181  v64hf
     182  hf_ext_s_60 (v64hf x)
     183  {
     184    return __builtin_shuffle (x, x, (v64hi) { PERM5 (60) });
     185  }
     186  
     187  /*
     188  ** bf_ext_s_1:
     189  **	ptrue	(p[0-7])\.b, vl256
     190  **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
     191  **	ext	\2\.b, \2\.b, \2\.b, #4
     192  **	st1h	\2\.s, \1, \[x8\]
     193  **	ret
     194  */
     195  v64bf
     196  bf_ext_s_1 (v64bf x)
     197  {
     198    return __builtin_shuffle (x, x, (v64hi) { PERM5 (1) });
     199  }
     200  
     201  /*
     202  ** bf_ext_s_40:
     203  **	ptrue	(p[0-7])\.b, vl256
     204  **	ld1h	(z[0-9]+)\.s, \1/z, \[x0\]
     205  **	ext	\2\.b, \2\.b, \2\.b, #160
     206  **	st1h	\2\.s, \1, \[x8\]
     207  **	ret
     208  */
     209  v64bf
     210  bf_ext_s_40 (v64bf x)
     211  {
     212    return __builtin_shuffle (x, x, (v64hi) { PERM5 (40) });
     213  }
     214  
     215  /*
     216  ** hi_ext_d_1:
     217  **	ptrue	(p[0-7])\.b, vl256
     218  **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
     219  **	ext	\2\.b, \2\.b, \2\.b, #8
     220  **	st1h	\2\.d, \1, \[x8\]
     221  **	ret
     222  */
     223  v32hi
     224  hi_ext_d_1 (v32hi x)
     225  {
     226    return __builtin_shuffle (x, x, (v32hi) { PERM4 (1) });
     227  }
     228  
     229  /*
     230  ** hi_ext_d_31:
     231  **	ptrue	(p[0-7])\.b, vl256
     232  **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
     233  **	ext	\2\.b, \2\.b, \2\.b, #248
     234  **	st1h	\2\.d, \1, \[x8\]
     235  **	ret
     236  */
     237  v32hi
     238  hi_ext_d_31 (v32hi x)
     239  {
     240    return __builtin_shuffle (x, x, (v32hi) { PERM4 (31) });
     241  }
     242  
     243  /*
     244  ** hf_ext_d_1:
     245  **	ptrue	(p[0-7])\.b, vl256
     246  **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
     247  **	ext	\2\.b, \2\.b, \2\.b, #8
     248  **	st1h	\2\.d, \1, \[x8\]
     249  **	ret
     250  */
     251  v32hf
     252  hf_ext_d_1 (v32hf x)
     253  {
     254    return __builtin_shuffle (x, x, (v32hi) { PERM4 (1) });
     255  }
     256  
     257  /*
     258  ** hf_ext_d_18:
     259  **	ptrue	(p[0-7])\.b, vl256
     260  **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
     261  **	ext	\2\.b, \2\.b, \2\.b, #144
     262  **	st1h	\2\.d, \1, \[x8\]
     263  **	ret
     264  */
     265  v32hf
     266  hf_ext_d_18 (v32hf x)
     267  {
     268    return __builtin_shuffle (x, x, (v32hi) { PERM4 (18) });
     269  }
     270  
     271  /*
     272  ** bf_ext_d_1:
     273  **	ptrue	(p[0-7])\.b, vl256
     274  **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
     275  **	ext	\2\.b, \2\.b, \2\.b, #8
     276  **	st1h	\2\.d, \1, \[x8\]
     277  **	ret
     278  */
     279  v32bf
     280  bf_ext_d_1 (v32bf x)
     281  {
     282    return __builtin_shuffle (x, x, (v32hi) { PERM4 (1) });
     283  }
     284  
     285  /*
     286  ** bf_ext_d_7:
     287  **	ptrue	(p[0-7])\.b, vl256
     288  **	ld1h	(z[0-9]+)\.d, \1/z, \[x0\]
     289  **	ext	\2\.b, \2\.b, \2\.b, #56
     290  **	st1h	\2\.d, \1, \[x8\]
     291  **	ret
     292  */
     293  v32bf
     294  bf_ext_d_7 (v32bf x)
     295  {
     296    return __builtin_shuffle (x, x, (v32hi) { PERM4 (7) });
     297  }
     298  
     299  /*
     300  ** si_ext_d_1:
     301  **	ptrue	(p[0-7])\.b, vl256
     302  **	ld1w	(z[0-9]+)\.d, \1/z, \[x0\]
     303  **	ext	\2\.b, \2\.b, \2\.b, #8
     304  **	st1w	\2\.d, \1, \[x8\]
     305  **	ret
     306  */
     307  v32si
     308  si_ext_d_1 (v32si x)
     309  {
     310    return __builtin_shuffle (x, x, (v32si) { PERM4 (1) });
     311  }
     312  
     313  /*
     314  ** si_ext_d_31:
     315  **	ptrue	(p[0-7])\.b, vl256
     316  **	ld1w	(z[0-9]+)\.d, \1/z, \[x0\]
     317  **	ext	\2\.b, \2\.b, \2\.b, #248
     318  **	st1w	\2\.d, \1, \[x8\]
     319  **	ret
     320  */
     321  v32si
     322  si_ext_d_31 (v32si x)
     323  {
     324    return __builtin_shuffle (x, x, (v32si) { PERM4 (31) });
     325  }
     326  
     327  /*
     328  ** sf_ext_d_1:
     329  **	ptrue	(p[0-7])\.b, vl256
     330  **	ld1w	(z[0-9]+)\.d, \1/z, \[x0\]
     331  **	ext	\2\.b, \2\.b, \2\.b, #8
     332  **	st1w	\2\.d, \1, \[x8\]
     333  **	ret
     334  */
     335  v32sf
     336  sf_ext_d_1 (v32sf x)
     337  {
     338    return __builtin_shuffle (x, x, (v32si) { PERM4 (1) });
     339  }
     340  
     341  /*
     342  ** sf_ext_d_31:
     343  **	ptrue	(p[0-7])\.b, vl256
     344  **	ld1w	(z[0-9]+)\.d, \1/z, \[x0\]
     345  **	ext	\2\.b, \2\.b, \2\.b, #248
     346  **	st1w	\2\.d, \1, \[x8\]
     347  **	ret
     348  */
     349  v32sf
     350  sf_ext_d_31 (v32sf x)
     351  {
     352    return __builtin_shuffle (x, x, (v32si) { PERM4 (31) });
     353  }