1  /* Test vdup_lane intrinsics work correctly.  */
       2  /* { dg-do run } */
       3  /* { dg-options "--save-temps -O1" } */
       4  
       5  #include <arm_neon.h>
       6  
       7  extern void abort (void);
       8  
       9  float32x2_t __attribute__ ((noinline))
      10  wrap_vdup_lane_f32_0 (float32x2_t a)
      11  {
      12    return vdup_lane_f32 (a, 0);
      13  }
      14  
      15  float32x2_t __attribute__ ((noinline))
      16  wrap_vdup_lane_f32_1 (float32x2_t a)
      17  {
      18    return vdup_lane_f32 (a, 1);
      19  }
      20  
      21  int __attribute__ ((noinline))
      22  test_vdup_lane_f32 ()
      23  {
      24    float32x2_t a;
      25    float32x2_t b;
      26    int i;
      27    float32_t c[2] = { 0.0 , 3.14 };
      28    float32_t d[2];
      29  
      30    a = vld1_f32 (c);
      31    b = wrap_vdup_lane_f32_0 (a);
      32    vst1_f32 (d, b);
      33    for (i = 0; i < 2; i++)
      34      if (c[0] != d[i])
      35        return 1;
      36  
      37    b = wrap_vdup_lane_f32_1 (a);
      38    vst1_f32 (d, b);
      39    for (i = 0; i < 2; i++)
      40      if (c[1] != d[i])
      41        return 1;
      42    return 0;
      43  }
      44  
      45  float32x4_t __attribute__ ((noinline))
      46  wrap_vdupq_lane_f32_0 (float32x2_t a)
      47  {
      48    return vdupq_lane_f32 (a, 0);
      49  }
      50  
      51  float32x4_t __attribute__ ((noinline))
      52  wrap_vdupq_lane_f32_1 (float32x2_t a)
      53  {
      54    return vdupq_lane_f32 (a, 1);
      55  }
      56  
      57  int __attribute__ ((noinline))
      58  test_vdupq_lane_f32 ()
      59  {
      60    float32x2_t a;
      61    float32x4_t b;
      62    int i;
      63    float32_t c[2] = { 0.0 , 3.14 };
      64    float32_t d[4];
      65  
      66    a = vld1_f32 (c);
      67    b = wrap_vdupq_lane_f32_0 (a);
      68    vst1q_f32 (d, b);
      69    for (i = 0; i < 4; i++)
      70      if (c[0] != d[i])
      71        return 1;
      72  
      73    b = wrap_vdupq_lane_f32_1 (a);
      74    vst1q_f32 (d, b);
      75    for (i = 0; i < 4; i++)
      76      if (c[1] != d[i])
      77        return 1;
      78    return 0;
      79  }
      80  
      81  int8x8_t __attribute__ ((noinline))
      82  wrap_vdup_lane_s8_0 (int8x8_t a)
      83  {
      84    return vdup_lane_s8 (a, 0);
      85  }
      86  
      87  int8x8_t __attribute__ ((noinline))
      88  wrap_vdup_lane_s8_1 (int8x8_t a)
      89  {
      90    return vdup_lane_s8 (a, 1);
      91  }
      92  
      93  int __attribute__ ((noinline))
      94  test_vdup_lane_s8 ()
      95  {
      96    int8x8_t a;
      97    int8x8_t b;
      98    int i;
      99    /* Only two first cases are interesting.  */
     100    int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     101    int8_t d[8];
     102  
     103    a = vld1_s8 (c);
     104    b = wrap_vdup_lane_s8_0 (a);
     105    vst1_s8 (d, b);
     106    for (i = 0; i < 8; i++)
     107      if (c[0] != d[i])
     108        return 1;
     109  
     110    b = wrap_vdup_lane_s8_1 (a);
     111    vst1_s8 (d, b);
     112    for (i = 0; i < 8; i++)
     113      if (c[1] != d[i])
     114        return 1;
     115    return 0;
     116  }
     117  
     118  int8x16_t __attribute__ ((noinline))
     119  wrap_vdupq_lane_s8_0 (int8x8_t a)
     120  {
     121    return vdupq_lane_s8 (a, 0);
     122  }
     123  
     124  int8x16_t __attribute__ ((noinline))
     125  wrap_vdupq_lane_s8_1 (int8x8_t a)
     126  {
     127    return vdupq_lane_s8 (a, 1);
     128  }
     129  
     130  int __attribute__ ((noinline))
     131  test_vdupq_lane_s8 ()
     132  {
     133    int8x8_t a;
     134    int8x16_t b;
     135    int i;
     136    /* Only two first cases are interesting.  */
     137    int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     138    int8_t d[16];
     139  
     140    a = vld1_s8 (c);
     141    b = wrap_vdupq_lane_s8_0 (a);
     142    vst1q_s8 (d, b);
     143    for (i = 0; i < 16; i++)
     144      if (c[0] != d[i])
     145        return 1;
     146  
     147    b = wrap_vdupq_lane_s8_1 (a);
     148    vst1q_s8 (d, b);
     149    for (i = 0; i < 16; i++)
     150      if (c[1] != d[i])
     151        return 1;
     152    return 0;
     153  }
     154  
     155  int16x4_t __attribute__ ((noinline))
     156  wrap_vdup_lane_s16_0 (int16x4_t a)
     157  {
     158    return vdup_lane_s16 (a, 0);
     159  }
     160  
     161  int16x4_t __attribute__ ((noinline))
     162  wrap_vdup_lane_s16_1 (int16x4_t a)
     163  {
     164    return vdup_lane_s16 (a, 1);
     165  }
     166  
     167  int __attribute__ ((noinline))
     168  test_vdup_lane_s16 ()
     169  {
     170    int16x4_t a;
     171    int16x4_t b;
     172    int i;
     173    /* Only two first cases are interesting.  */
     174    int16_t c[4] = { 0, 1, 2, 3 };
     175    int16_t d[4];
     176  
     177    a = vld1_s16 (c);
     178    b = wrap_vdup_lane_s16_0 (a);
     179    vst1_s16 (d, b);
     180    for (i = 0; i < 4; i++)
     181      if (c[0] != d[i])
     182        return 1;
     183  
     184    b = wrap_vdup_lane_s16_1 (a);
     185    vst1_s16 (d, b);
     186    for (i = 0; i < 4; i++)
     187      if (c[1] != d[i])
     188        return 1;
     189    return 0;
     190  }
     191  
     192  int16x8_t __attribute__ ((noinline))
     193  wrap_vdupq_lane_s16_0 (int16x4_t a)
     194  {
     195    return vdupq_lane_s16 (a, 0);
     196  }
     197  
     198  int16x8_t __attribute__ ((noinline))
     199  wrap_vdupq_lane_s16_1 (int16x4_t a)
     200  {
     201    return vdupq_lane_s16 (a, 1);
     202  }
     203  
     204  int __attribute__ ((noinline))
     205  test_vdupq_lane_s16 ()
     206  {
     207    int16x4_t a;
     208    int16x8_t b;
     209    int i;
     210    /* Only two first cases are interesting.  */
     211    int16_t c[4] = { 0, 1, 2, 3 };
     212    int16_t d[8];
     213  
     214    a = vld1_s16 (c);
     215    b = wrap_vdupq_lane_s16_0 (a);
     216    vst1q_s16 (d, b);
     217    for (i = 0; i < 8; i++)
     218      if (c[0] != d[i])
     219        return 1;
     220  
     221    b = wrap_vdupq_lane_s16_1 (a);
     222    vst1q_s16 (d, b);
     223    for (i = 0; i < 8; i++)
     224      if (c[1] != d[i])
     225        return 1;
     226    return 0;
     227  }
     228  
     229  int32x2_t __attribute__ ((noinline))
     230  wrap_vdup_lane_s32_0 (int32x2_t a)
     231  {
     232    return vdup_lane_s32 (a, 0);
     233  }
     234  
     235  int32x2_t __attribute__ ((noinline))
     236  wrap_vdup_lane_s32_1 (int32x2_t a)
     237  {
     238    return vdup_lane_s32 (a, 1);
     239  }
     240  
     241  int __attribute__ ((noinline))
     242  test_vdup_lane_s32 ()
     243  {
     244    int32x2_t a;
     245    int32x2_t b;
     246    int i;
     247    int32_t c[2] = { 0, 1 };
     248    int32_t d[2];
     249  
     250    a = vld1_s32 (c);
     251    b = wrap_vdup_lane_s32_0 (a);
     252    vst1_s32 (d, b);
     253    for (i = 0; i < 2; i++)
     254      if (c[0] != d[i])
     255        return 1;
     256  
     257    b = wrap_vdup_lane_s32_1 (a);
     258    vst1_s32 (d, b);
     259    for (i = 0; i < 2; i++)
     260      if (c[1] != d[i])
     261        return 1;
     262    return 0;
     263  }
     264  
     265  int32x4_t __attribute__ ((noinline))
     266  wrap_vdupq_lane_s32_0 (int32x2_t a)
     267  {
     268    return vdupq_lane_s32 (a, 0);
     269  }
     270  
     271  int32x4_t __attribute__ ((noinline))
     272  wrap_vdupq_lane_s32_1 (int32x2_t a)
     273  {
     274    return vdupq_lane_s32 (a, 1);
     275  }
     276  
     277  int __attribute__ ((noinline))
     278  test_vdupq_lane_s32 ()
     279  {
     280    int32x2_t a;
     281    int32x4_t b;
     282    int i;
     283    int32_t c[2] = { 0, 1 };
     284    int32_t d[4];
     285  
     286    a = vld1_s32 (c);
     287    b = wrap_vdupq_lane_s32_0 (a);
     288    vst1q_s32 (d, b);
     289    for (i = 0; i < 4; i++)
     290      if (c[0] != d[i])
     291        return 1;
     292  
     293    b = wrap_vdupq_lane_s32_1 (a);
     294    vst1q_s32 (d, b);
     295    for (i = 0; i < 4; i++)
     296      if (c[1] != d[i])
     297        return 1;
     298    return 0;
     299  }
     300  
     301  int64x1_t __attribute__ ((noinline))
     302  wrap_vdup_lane_s64_0 (int64x1_t a)
     303  {
     304    return vdup_lane_s64 (a, 0);
     305  }
     306  
     307  int __attribute__ ((noinline))
     308  test_vdup_lane_s64 ()
     309  {
     310    int64x1_t a;
     311    int64x1_t b;
     312    int64_t c[1];
     313    int64_t d[1];
     314  
     315    c[0] = 0;
     316    a = vld1_s64 (c);
     317    b = wrap_vdup_lane_s64_0 (a);
     318    vst1_s64 (d, b);
     319    if (c[0] != d[0])
     320      return 1;
     321  
     322    return 0;
     323  }
     324  
     325  int64x2_t __attribute__ ((noinline))
     326  wrap_vdupq_lane_s64_0 (int64x1_t a)
     327  {
     328    return vdupq_lane_s64 (a, 0);
     329  }
     330  
     331  int __attribute__ ((noinline))
     332  test_vdupq_lane_s64 ()
     333  {
     334    int64x1_t a;
     335    int64x2_t b;
     336    int i;
     337    int64_t c[1];
     338    int64_t d[2];
     339  
     340    c[0] = 0;
     341    a = vld1_s64 (c);
     342    b = wrap_vdupq_lane_s64_0 (a);
     343    vst1q_s64 (d, b);
     344    for (i = 0; i < 2; i++)
     345      if (c[0] != d[i])
     346        return 1;
     347    return 0;
     348  }
     349  
     350  int
     351  main ()
     352  {
     353  
     354    if (test_vdup_lane_f32 ())
     355      abort ();
     356    if (test_vdup_lane_s8 ())
     357      abort ();
     358    if (test_vdup_lane_s16 ())
     359      abort ();
     360    if (test_vdup_lane_s32 ())
     361      abort ();
     362    if (test_vdup_lane_s64 ())
     363      abort ();
     364    if (test_vdupq_lane_f32 ())
     365      abort ();
     366    if (test_vdupq_lane_s8 ())
     367      abort ();
     368    if (test_vdupq_lane_s16 ())
     369      abort ();
     370    if (test_vdupq_lane_s32 ())
     371      abort ();
     372    if (test_vdupq_lane_s64 ())
     373      abort ();
     374  
     375    return 0;
     376  }
     377  
     378  /* Asm check for test_vdup_lane_s8.  */
     379  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
     380  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
     381  
     382  /* Asm check for test_vdupq_lane_s8.  */
     383  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
     384  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
     385  
     386  /* Asm check for test_vdup_lane_s16.  */
     387  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
     388  /* Asm check for test_vdup_lane_s16.  */
     389  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
     390  
     391  /* Asm check for test_vdupq_lane_s16.  */
     392  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
     393  /* Asm check for test_vdupq_lane_s16.  */
     394  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
     395  
     396  /* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32.  */
     397  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
     398  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
     399  
     400  /* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32.  */
     401  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
     402  /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
     403