1  /* { dg-require-effective-target arm_v8_1m_mve_ok } */
       2  /* { dg-add-options arm_v8_1m_mve } */
       3  /* { dg-additional-options "-O1" } */
       4  /* { dg-final { check-function-bodies "**" "" } } */
       5  
       6  #include "arm_mve.h"
       7  
       8  #ifdef __cplusplus
       9  extern "C" {
      10  #endif
      11  
      12  /*
      13  **test:
      14  **	...
      15  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      16  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      17  **	...
      18  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      19  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      20  **	...
      21  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      22  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      23  **	...
      24  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      25  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      26  **	...
      27  */
      28  void
      29  test(const uint8_t * in, uint8_t * out, int width)
      30  {
      31    uint8x16x2_t rg = vld2q(in);
      32    uint8x16x2_t gb = vld2q(in + width);
      33    vst2q (out, rg);
      34    vst2q (out + width, gb);
      35  }
      36  
      37  /*
      38  **test2:
      39  **	...
      40  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      41  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
      42  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      43  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      44  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      45  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
      46  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      47  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      48  **	...
      49  */
      50  void
      51  test2(const uint8_t * in, uint8_t * out)
      52  {
      53    uint8x16x2_t rg = vld2q(in);
      54    uint8x16x2_t gb = vld2q(in + 32);
      55    vst2q (out, rg);
      56    vst2q (out + 32, gb);
      57  }
      58  
      59  /*
      60  **test3:
      61  **	...
      62  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      63  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      64  **	...
      65  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      66  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      67  **	...
      68  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      69  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      70  **	...
      71  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      72  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      73  **	...
      74  */
      75  void
      76  test3(const uint8_t * in, uint8_t * out)
      77  {
      78    uint8x16x2_t rg = vld2q(in);
      79    uint8x16x2_t gb = vld2q(in - 32);
      80    vst2q (out, rg);
      81    vst2q (out - 32, gb);
      82  }
      83  
      84  /*
      85  **test4:
      86  **	...
      87  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      88  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      89  **	...
      90  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      91  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      92  **	...
      93  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      94  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      95  **	...
      96  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      97  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
      98  **	...
      99  */
     100  void
     101  test4(const uint8_t * in, uint8_t * out)
     102  {
     103    uint8x16x2_t rg = vld2q(in);
     104    uint8x16x2_t gb = vld2q(in + 64);
     105    vst2q (out, rg);
     106    vst2q (out + 64, gb);
     107  }
     108  
     109  /*
     110  **test5:
     111  **	...
     112  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     113  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     114  **	...
     115  **	vld20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     116  **	vld21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     117  **	...
     118  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     119  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     120  **	...
     121  **	vst20.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     122  **	vst21.8	{q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     123  **	...
     124  */
     125  void
     126  test5(const uint8_t * in, uint8_t * out)
     127  {
     128    uint8x16x2_t rg = vld2q(in);
     129    uint8x16x2_t gb = vld2q(in + 42);
     130    vst2q (out, rg);
     131    vst2q (out + 42, gb);
     132  }
     133  
     134  /*
     135  **test6:
     136  **	...
     137  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     138  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     139  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     140  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     141  **	...
     142  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     143  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     144  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     145  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     146  **	...
     147  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     148  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     149  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     150  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     151  **	...
     152  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     153  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     154  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     155  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     156  **	...
     157  */
     158  void
     159  test6(const uint8_t * in, uint8_t * out, int width)
     160  {
     161    uint8x16x4_t rg = vld4q(in);
     162    uint8x16x4_t gb = vld4q(in + width);
     163    vst4q (out, rg);
     164    vst4q (out + width, gb);
     165  }
     166  
     167  /*
     168  **test7:
     169  **	...
     170  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     171  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     172  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     173  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     174  **	...
     175  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     176  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     177  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     178  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     179  **	...
     180  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     181  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     182  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     183  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     184  **	...
     185  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     186  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     187  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     188  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     189  **	...
     190  */
     191  void
     192  test7(const uint8_t * in, uint8_t * out)
     193  {
     194    uint8x16x4_t rg = vld4q(in);
     195    uint8x16x4_t gb = vld4q(in + 32);
     196    vst4q (out, rg);
     197    vst4q (out + 32, gb);
     198  }
     199  
     200  /*
     201  **test8:
     202  **	...
     203  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     204  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     205  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     206  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
     207  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     208  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     209  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     210  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     211  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     212  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     213  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     214  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
     215  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     216  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     217  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     218  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     219  **	...
     220  */
     221  void
     222  test8(const uint8_t * in, uint8_t * out)
     223  {
     224    uint8x16x4_t rg = vld4q(in);
     225    uint8x16x4_t gb = vld4q(in + 64);
     226    vst4q (out, rg);
     227    vst4q (out + 64, gb);
     228  }
     229  
     230  /*
     231  **test9:
     232  **	...
     233  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     234  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     235  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     236  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     237  **	...
     238  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     239  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     240  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     241  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     242  **	...
     243  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     244  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     245  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     246  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     247  **	...
     248  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     249  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     250  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     251  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     252  **	...
     253  */
     254  void
     255  test9(const uint8_t * in, uint8_t * out)
     256  {
     257    uint8x16x4_t rg = vld4q(in);
     258    uint8x16x4_t gb = vld4q(in - 64);
     259    vst4q (out, rg);
     260    vst4q (out - 64, gb);
     261  }
     262  
     263  /*
     264  **test10:
     265  **	...
     266  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     267  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     268  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     269  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     270  **	...
     271  **	vld40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     272  **	vld41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     273  **	vld42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     274  **	vld43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     275  **	...
     276  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     277  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     278  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     279  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     280  **	...
     281  **	vst40.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     282  **	vst41.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     283  **	vst42.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     284  **	vst43.8	{q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
     285  **	...
     286  */
     287  void
     288  test10(const uint8_t * in, uint8_t * out)
     289  {
     290    uint8x16x4_t rg = vld4q(in);
     291    uint8x16x4_t gb = vld4q(in + 42);
     292    vst4q (out, rg);
     293    vst4q (out + 42, gb);
     294  }
     295  
     296  #ifdef __cplusplus
     297  }
     298  #endif
     299  
     300  /* { dg-final { scan-assembler-not "__ARM_undef" } } */