1  /* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
       2  /* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
       3  
       4  #include <arm_neon.h>
       5  
       6  /*
       7  ** s32x2_1:
       8  **	dup	v0\.2s, w0
       9  **	ret
      10  */
      11  int32x2_t
      12  s32x2_1 (int32_t x)
      13  {
      14    int32_t arr[] = { x, x };
      15    return vld1_s32 (arr);
      16  }
      17  
      18  /*
      19  ** s32x2_2:
      20  **	fmov	s0, w0
      21  **	ret
      22  */
      23  int32x2_t
      24  s32x2_2 (int32_t x)
      25  {
      26    int32_t arr[] = { x, 0 };
      27    return vld1_s32 (arr);
      28  }
      29  
      30  /*
      31  ** s32x2_3:
      32  **	fmov	s0, w0
      33  **	ins	v0\.s\[1\], w1
      34  **	ret
      35  */
      36  int32x2_t
      37  s32x2_3 (int32_t x, int32_t y)
      38  {
      39    int32_t arr[] = { x, y };
      40    return vld1_s32 (arr);
      41  }
      42  
      43  /*
      44  ** f32x2_1:
      45  **	dup	v0\.2s, v0.s\[0\]
      46  **	ret
      47  */
      48  float32x2_t
      49  f32x2_1 (float32_t x)
      50  {
      51    float32_t arr[] = { x, x };
      52    return vld1_f32 (arr);
      53  }
      54  
      55  /*
      56  ** f32x2_2:
      57  **	ins	v0\.s\[1\], v1.s\[0\]
      58  **	ret
      59  */
      60  float32x2_t
      61  f32x2_2 (float32_t x, float32_t y)
      62  {
      63    float32_t arr[] = { x, y };
      64    return vld1_f32 (arr);
      65  }
      66  
      67  /*
      68  ** s16x4_1:
      69  **	dup	v0\.4h, w0
      70  **	ret
      71  */
      72  int16x4_t
      73  s16x4_1 (int16_t x)
      74  {
      75    int16_t arr[] = { x, x, x, x };
      76    return vld1_s16 (arr);
      77  }
      78  
      79  /*
      80  ** s16x4_2:
      81  **	...
      82  **	fmov	[dsh]0, [wx][0-9]+
      83  **	ret
      84  */
      85  int16x4_t
      86  s16x4_2 (int16_t x)
      87  {
      88    int16_t arr[] = { x, 0, 0, 0 };
      89    return vld1_s16 (arr);
      90  }
      91  
      92  /*
      93  ** s16x4_3:
      94  **	dup	v0\.4h, w1
      95  **	ins	v0.h\[0\], w0
      96  **	ret
      97  */
      98  int16x4_t
      99  s16x4_3 (int16_t x, int16_t y)
     100  {
     101    int16_t arr[] = { x, y, y, y };
     102    return vld1_s16 (arr);
     103  }
     104  
     105  /*
     106  ** f16x4_1:
     107  **	dup	v0\.4h, v0.h\[0\]
     108  **	ret
     109  */
     110  float16x4_t
     111  f16x4_1 (float16_t x)
     112  {
     113    float16_t arr[] = { x, x, x, x };
     114    return vld1_f16 (arr);
     115  }
     116  
     117  /*
     118  ** s64x2_1:
     119  **	dup	v0\.2d, x0
     120  **	ret
     121  */
     122  int64x2_t
     123  s64x2_1 (int64_t x)
     124  {
     125    int64_t arr[] = { x, x };
     126    return vld1q_s64 (arr);
     127  }
     128  
     129  /*
     130  ** s64x2_2: { xfail *-*-* }
     131  **	fmov	d0, x0
     132  **	ret
     133  */
     134  int64x2_t
     135  s64x2_2 (int64_t x)
     136  {
     137    int64_t arr[] = { x, 0 };
     138    return vld1q_s64 (arr);
     139  }
     140  
     141  /*
     142  ** s64x2_3:
     143  **	fmov	d0, x0
     144  **	ins	v0\.d\[1\], x1
     145  **	ret
     146  */
     147  int64x2_t
     148  s64x2_3 (int64_t x, int64_t y)
     149  {
     150    int64_t arr[] = { x, y };
     151    return vld1q_s64 (arr);
     152  }
     153  
     154  /*
     155  ** f64x2_1:
     156  **	dup	v0\.2d, v0.d\[0\]
     157  **	ret
     158  */
     159  float64x2_t
     160  f64x2_1 (float64_t x)
     161  {
     162    float64_t arr[] = { x, x };
     163    return vld1q_f64 (arr);
     164  }
     165  
     166  /*
     167  ** f64x2_2:
     168  **	ins	v0\.d\[1\], v1.d\[0\]
     169  **	ret
     170  */
     171  float64x2_t
     172  f64x2_2 (float64_t x, float64_t y)
     173  {
     174    float64_t arr[] = { x, y };
     175    return vld1q_f64 (arr);
     176  }
     177  
     178  /*
     179  ** s32x4_1:
     180  **	dup	v0\.4s, w0
     181  **	ret
     182  */
     183  int32x4_t
     184  s32x4_1 (int32_t x)
     185  {
     186    int32_t arr[] = { x, x, x, x };
     187    return vld1q_s32 (arr);
     188  }
     189  
     190  /*
     191  ** s32x4_2: { xfail *-*-* }
     192  **	fmov	s0, w0
     193  **	ret
     194  */
     195  int32x4_t
     196  s32x4_2 (int32_t x)
     197  {
     198    int32_t arr[] = { x, 0, 0, 0 };
     199    return vld1q_s32 (arr);
     200  }
     201  
     202  /*
     203  ** s32x4_3:
     204  **	dup	v0\.4s, w1
     205  **	ins	v0.s\[0\], w0
     206  **	ret
     207  */
     208  int32x4_t
     209  s32x4_3 (int32_t x, int32_t y)
     210  {
     211    int32_t arr[] = { x, y, y, y };
     212    return vld1q_s32 (arr);
     213  }
     214  
     215  /*
     216  ** f32x4_1:
     217  **	dup	v0\.4s, v0.s\[0\]
     218  **	ret
     219  */
     220  float32x4_t
     221  f32x4_1 (float32_t x)
     222  {
     223    float32_t arr[] = { x, x, x, x };
     224    return vld1q_f32 (arr);
     225  }
     226  
     227  void consume (float32x4_t, float32x4_t, float32x4_t, float32x4_t);
     228  
     229  /*
     230  ** produce_1:
     231  ** (
     232  **	dup	v0\.4s, v0\.s\[0\]
     233  **	dup	v1\.4s, v1\.s\[0\]
     234  **	dup	v2\.4s, v2\.s\[0\]
     235  **	dup	v3\.4s, v3\.s\[0\]
     236  ** |
     237  **	dup	v3\.4s, v3\.s\[0\]
     238  **	dup	v2\.4s, v2\.s\[0\]
     239  **	dup	v1\.4s, v1\.s\[0\]
     240  **	dup	v0\.4s, v0\.s\[0\]
     241  ** )
     242  **	b	consume
     243  */
     244  void
     245  produce_1 (float32_t a, float32_t b, float32_t c, float32_t d)
     246  {
     247    float arr[4][4] = {
     248      { a, a, a, a },
     249      { b, b, b, b },
     250      { c, c, c, c },
     251      { d, d, d, d }
     252    };
     253    consume (vld1q_f32 (arr[0]), vld1q_f32 (arr[1]),
     254  	   vld1q_f32 (arr[2]), vld1q_f32 (arr[3]));
     255  }
     256  
     257  /*
     258  ** produce_2:
     259  ** (
     260  **	dup	v0\.4s, v0\.s\[0\]
     261  **	dup	v1\.4s, v1\.s\[0\]
     262  **	dup	v2\.4s, v2\.s\[0\]
     263  **	dup	v3\.4s, v3\.s\[0\]
     264  ** |
     265  **	dup	v3\.4s, v3\.s\[0\]
     266  **	dup	v2\.4s, v2\.s\[0\]
     267  **	dup	v1\.4s, v1\.s\[0\]
     268  **	dup	v0\.4s, v0\.s\[0\]
     269  ** )
     270  **	b	consume
     271  */
     272  void
     273  produce_2 (float32_t a, float32_t b, float32_t c, float32_t d)
     274  {
     275    float arr0[] = { a, a, a, a };
     276    float arr1[] = { b, b, b, b };
     277    float arr2[] = { c, c, c, c };
     278    float arr3[] = { d, d, d, d };
     279    consume (vld1q_f32 (arr0), vld1q_f32 (arr1),
     280  	   vld1q_f32 (arr2), vld1q_f32 (arr3));
     281  }