1  /* Test the vmul_n_f64 AArch64 SIMD intrinsic.  */
       2  
       3  /* { dg-do run } */
       4  /* { dg-options "-O2 --save-temps" } */
       5  
       6  #include "arm_neon.h"
       7  
       8  extern void abort (void);
       9  
      10  #define A (132.4f)
      11  #define B (-0.0f)
      12  #define C (-34.8f)
      13  #define D (289.34f)
      14  float32_t expected2_1[2] = {A * A, B * A};
      15  float32_t expected2_2[2] = {A * B, B * B};
      16  float32_t expected4_1[4] = {A * A, B * A, C * A, D * A};
      17  float32_t expected4_2[4] = {A * B, B * B, C * B, D * B};
      18  float32_t expected4_3[4] = {A * C, B * C, C * C, D * C};
      19  float32_t expected4_4[4] = {A * D, B * D, C * D, D * D};
      20  float32_t _elemA = A;
      21  float32_t _elemB = B;
      22  float32_t _elemC = C;
      23  float32_t _elemD = D;
      24  
      25  #define AD (1234.5)
      26  #define BD (-0.0)
      27  #define CD (71.3)
      28  #define DD (-1024.4)
      29  float64_t expectedd2_1[2] = {AD * CD, BD * CD};
      30  float64_t expectedd2_2[2] = {AD * DD, BD * DD};
      31  float64_t _elemdC = CD;
      32  float64_t _elemdD = DD;
      33  
      34  
      35  #define AS (1024)
      36  #define BS (-31)
      37  #define CS (0)
      38  #define DS (655)
      39  int32_t expecteds2_1[2] = {AS * AS, BS * AS};
      40  int32_t expecteds2_2[2] = {AS * BS, BS * BS};
      41  int32_t expecteds4_1[4] = {AS * AS, BS * AS, CS * AS, DS * AS};
      42  int32_t expecteds4_2[4] = {AS * BS, BS * BS, CS * BS, DS * BS};
      43  int32_t expecteds4_3[4] = {AS * CS, BS * CS, CS * CS, DS * CS};
      44  int32_t expecteds4_4[4] = {AS * DS, BS * DS, CS * DS, DS * DS};
      45  int32_t _elemsA = AS;
      46  int32_t _elemsB = BS;
      47  int32_t _elemsC = CS;
      48  int32_t _elemsD = DS;
      49  
      50  #define AH ((int16_t) 0)
      51  #define BH ((int16_t) -32)
      52  #define CH ((int16_t) 102)
      53  #define DH ((int16_t) -51)
      54  #define EH ((int16_t) 71)
      55  #define FH ((int16_t) -91)
      56  #define GH ((int16_t) 48)
      57  #define HH ((int16_t) 255)
      58  int16_t expectedh4_1[4] = {AH * AH, BH * AH, CH * AH, DH * AH};
      59  int16_t expectedh4_2[4] = {AH * BH, BH * BH, CH * BH, DH * BH};
      60  int16_t expectedh4_3[4] = {AH * CH, BH * CH, CH * CH, DH * CH};
      61  int16_t expectedh4_4[4] = {AH * DH, BH * DH, CH * DH, DH * DH};
      62  int16_t expectedh8_1[8] = {AH * AH, BH * AH, CH * AH, DH * AH,
      63  			   EH * AH, FH * AH, GH * AH, HH * AH};
      64  int16_t expectedh8_2[8] = {AH * BH, BH * BH, CH * BH, DH * BH,
      65  			   EH * BH, FH * BH, GH * BH, HH * BH};
      66  int16_t expectedh8_3[8] = {AH * CH, BH * CH, CH * CH, DH * CH,
      67  			   EH * CH, FH * CH, GH * CH, HH * CH};
      68  int16_t expectedh8_4[8] = {AH * DH, BH * DH, CH * DH, DH * DH,
      69  			   EH * DH, FH * DH, GH * DH, HH * DH};
      70  int16_t expectedh8_5[8] = {AH * EH, BH * EH, CH * EH, DH * EH,
      71  			   EH * EH, FH * EH, GH * EH, HH * EH};
      72  int16_t expectedh8_6[8] = {AH * FH, BH * FH, CH * FH, DH * FH,
      73  			   EH * FH, FH * FH, GH * FH, HH * FH};
      74  int16_t expectedh8_7[8] = {AH * GH, BH * GH, CH * GH, DH * GH,
      75  			   EH * GH, FH * GH, GH * GH, HH * GH};
      76  int16_t expectedh8_8[8] = {AH * HH, BH * HH, CH * HH, DH * HH,
      77  			   EH * HH, FH * HH, GH * HH, HH * HH};
      78  int16_t _elemhA = AH;
      79  int16_t _elemhB = BH;
      80  int16_t _elemhC = CH;
      81  int16_t _elemhD = DH;
      82  int16_t _elemhE = EH;
      83  int16_t _elemhF = FH;
      84  int16_t _elemhG = GH;
      85  int16_t _elemhH = HH;
      86  
      87  #define AUS (1024)
      88  #define BUS (31)
      89  #define CUS (0)
      90  #define DUS (655)
      91  uint32_t expectedus2_1[2] = {AUS * AUS, BUS * AUS};
      92  uint32_t expectedus2_2[2] = {AUS * BUS, BUS * BUS};
      93  uint32_t expectedus4_1[4] = {AUS * AUS, BUS * AUS, CUS * AUS, DUS * AUS};
      94  uint32_t expectedus4_2[4] = {AUS * BUS, BUS * BUS, CUS * BUS, DUS * BUS};
      95  uint32_t expectedus4_3[4] = {AUS * CUS, BUS * CUS, CUS * CUS, DUS * CUS};
      96  uint32_t expectedus4_4[4] = {AUS * DUS, BUS * DUS, CUS * DUS, DUS * DUS};
      97  uint32_t _elemusA = AUS;
      98  uint32_t _elemusB = BUS;
      99  uint32_t _elemusC = CUS;
     100  uint32_t _elemusD = DUS;
     101  
     102  #define AUH ((uint16_t) 0)
     103  #define BUH ((uint16_t) 32)
     104  #define CUH ((uint16_t) 102)
     105  #define DUH ((uint16_t) 51)
     106  #define EUH ((uint16_t) 71)
     107  #define FUH ((uint16_t) 91)
     108  #define GUH ((uint16_t) 48)
     109  #define HUH ((uint16_t) 255)
     110  uint16_t expecteduh4_1[4] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH};
     111  uint16_t expecteduh4_2[4] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH};
     112  uint16_t expecteduh4_3[4] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH};
     113  uint16_t expecteduh4_4[4] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH};
     114  uint16_t expecteduh8_1[8] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH,
     115  			     EUH * AUH, FUH * AUH, GUH * AUH, HUH * AUH};
     116  uint16_t expecteduh8_2[8] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH,
     117  			     EUH * BUH, FUH * BUH, GUH * BUH, HUH * BUH};
     118  uint16_t expecteduh8_3[8] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH,
     119  			     EUH * CUH, FUH * CUH, GUH * CUH, HUH * CUH};
     120  uint16_t expecteduh8_4[8] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH,
     121  			     EUH * DUH, FUH * DUH, GUH * DUH, HUH * DUH};
     122  uint16_t expecteduh8_5[8] = {AUH * EUH, BUH * EUH, CUH * EUH, DUH * EUH,
     123  			     EUH * EUH, FUH * EUH, GUH * EUH, HUH * EUH};
     124  uint16_t expecteduh8_6[8] = {AUH * FUH, BUH * FUH, CUH * FUH, DUH * FUH,
     125  			     EUH * FUH, FUH * FUH, GUH * FUH, HUH * FUH};
     126  uint16_t expecteduh8_7[8] = {AUH * GUH, BUH * GUH, CUH * GUH, DUH * GUH,
     127  			     EUH * GUH, FUH * GUH, GUH * GUH, HUH * GUH};
     128  uint16_t expecteduh8_8[8] = {AUH * HUH, BUH * HUH, CUH * HUH, DUH * HUH,
     129  			     EUH * HUH, FUH * HUH, GUH * HUH, HUH * HUH};
     130  uint16_t _elemuhA = AUH;
     131  uint16_t _elemuhB = BUH;
     132  uint16_t _elemuhC = CUH;
     133  uint16_t _elemuhD = DUH;
     134  uint16_t _elemuhE = EUH;
     135  uint16_t _elemuhF = FUH;
     136  uint16_t _elemuhG = GUH;
     137  uint16_t _elemuhH = HUH;
     138  
     139  void
     140  check_v2sf (float32_t elemA, float32_t elemB)
     141  {
     142    int32_t indx;
     143    const float32_t vec32x2_buf[2] = {A, B};
     144    float32x2_t vec32x2_src = vld1_f32 (vec32x2_buf);
     145    float32_t vec32x2_res[2];
     146  
     147    vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemA));
     148  
     149    asm volatile ("" : : : "memory");
     150    for (indx = 0; indx < 2; indx++)
     151      if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_1[indx])
     152        abort ();
     153  
     154    vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemB));
     155  
     156    asm volatile ("" : : : "memory");
     157    for (indx = 0; indx < 2; indx++)
     158      if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_2[indx])
     159        abort ();
     160  
     161  /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
     162  }
     163  
     164  void
     165  check_v4sf (float32_t elemA, float32_t elemB, float32_t elemC, float32_t elemD)
     166  {
     167    int32_t indx;
     168    const float32_t vec32x4_buf[4] = {A, B, C, D};
     169    float32x4_t vec32x4_src = vld1q_f32 (vec32x4_buf);
     170    float32_t vec32x4_res[4];
     171  
     172    vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemA));
     173  
     174    asm volatile ("" : : : "memory");
     175    for (indx = 0; indx < 4; indx++)
     176      if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_1[indx])
     177        abort ();
     178  
     179    vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemB));
     180  
     181    asm volatile ("" : : : "memory");
     182    for (indx = 0; indx < 4; indx++)
     183      if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_2[indx])
     184        abort ();
     185  
     186    vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemC));
     187  
     188    asm volatile ("" : : : "memory");
     189    for (indx = 0; indx < 4; indx++)
     190      if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_3[indx])
     191        abort ();
     192  
     193    vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemD));
     194  
     195    asm volatile ("" : : : "memory");
     196    for (indx = 0; indx < 4; indx++)
     197      if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_4[indx])
     198        abort ();
     199  
     200  /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */
     201  }
     202  
     203  void
     204  check_v2df (float64_t elemdC, float64_t elemdD)
     205  {
     206    int32_t indx;
     207    const float64_t vec64x2_buf[2] = {AD, BD};
     208    float64x2_t vec64x2_src = vld1q_f64 (vec64x2_buf);
     209    float64_t vec64x2_res[2];
     210  
     211    vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdC));
     212  
     213    asm volatile ("" : : : "memory");
     214    for (indx = 0; indx < 2; indx++)
     215      if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_1[indx])
     216        abort ();
     217  
     218    vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdD));
     219  
     220    asm volatile ("" : : : "memory");
     221    for (indx = 0; indx < 2; indx++)
     222      if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_2[indx])
     223        abort ();
     224  
     225  /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[0\\\]" 2 } } */
     226  }
     227  
     228  void
     229  check_v2si (int32_t elemsA, int32_t elemsB)
     230  {
     231    int32_t indx;
     232    const int32_t vecs32x2_buf[2] = {AS, BS};
     233    int32x2_t vecs32x2_src = vld1_s32 (vecs32x2_buf);
     234    int32_t vecs32x2_res[2];
     235  
     236    vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsA));
     237  
     238    asm volatile ("" : : : "memory");
     239    for (indx = 0; indx < 2; indx++)
     240      if (vecs32x2_res[indx] != expecteds2_1[indx])
     241        abort ();
     242  
     243    vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsB));
     244  
     245    asm volatile ("" : : : "memory");
     246    for (indx = 0; indx < 2; indx++)
     247      if (vecs32x2_res[indx] != expecteds2_2[indx])
     248        abort ();
     249  }
     250  
     251  void
     252  check_v2si_unsigned (uint32_t elemusA, uint32_t elemusB)
     253  {
     254    int indx;
     255    const uint32_t vecus32x2_buf[2] = {AUS, BUS};
     256    uint32x2_t vecus32x2_src = vld1_u32 (vecus32x2_buf);
     257    uint32_t vecus32x2_res[2];
     258  
     259    vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusA));
     260  
     261    asm volatile ("" : : : "memory");
     262    for (indx = 0; indx < 2; indx++)
     263      if (vecus32x2_res[indx] != expectedus2_1[indx])
     264        abort ();
     265  
     266    vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusB));
     267  
     268    asm volatile ("" : : : "memory");
     269    for (indx = 0; indx < 2; indx++)
     270      if (vecus32x2_res[indx] != expectedus2_2[indx])
     271        abort ();
     272  
     273  /* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */
     274  }
     275  
     276  void
     277  check_v4si (int32_t elemsA, int32_t elemsB, int32_t elemsC, int32_t elemsD)
     278  {
     279    int32_t indx;
     280    const int32_t vecs32x4_buf[4] = {AS, BS, CS, DS};
     281    int32x4_t vecs32x4_src = vld1q_s32 (vecs32x4_buf);
     282    int32_t vecs32x4_res[4];
     283  
     284    vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsA));
     285  
     286    asm volatile ("" : : : "memory");
     287    for (indx = 0; indx < 4; indx++)
     288      if (vecs32x4_res[indx] != expecteds4_1[indx])
     289        abort ();
     290  
     291    vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsB));
     292  
     293    asm volatile ("" : : : "memory");
     294    for (indx = 0; indx < 4; indx++)
     295      if (vecs32x4_res[indx] != expecteds4_2[indx])
     296        abort ();
     297  
     298    vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsC));
     299  
     300    asm volatile ("" : : : "memory");
     301    for (indx = 0; indx < 4; indx++)
     302      if (vecs32x4_res[indx] != expecteds4_3[indx])
     303        abort ();
     304  
     305    vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsD));
     306  
     307    asm volatile ("" : : : "memory");
     308    for (indx = 0; indx < 4; indx++)
     309      if (vecs32x4_res[indx] != expecteds4_4[indx])
     310        abort ();
     311  }
     312  
     313  void
     314  check_v4si_unsigned (uint32_t elemusA, uint32_t elemusB, uint32_t elemusC,
     315  		     uint32_t elemusD)
     316  {
     317    int indx;
     318    const uint32_t vecus32x4_buf[4] = {AUS, BUS, CUS, DUS};
     319    uint32x4_t vecus32x4_src = vld1q_u32 (vecus32x4_buf);
     320    uint32_t vecus32x4_res[4];
     321  
     322    vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusA));
     323  
     324    asm volatile ("" : : : "memory");
     325    for (indx = 0; indx < 4; indx++)
     326      if (vecus32x4_res[indx] != expectedus4_1[indx])
     327        abort ();
     328  
     329    vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusB));
     330  
     331    asm volatile ("" : : : "memory");
     332    for (indx = 0; indx < 4; indx++)
     333      if (vecus32x4_res[indx] != expectedus4_2[indx])
     334        abort ();
     335  
     336    vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusC));
     337  
     338    asm volatile ("" : : : "memory");
     339    for (indx = 0; indx < 4; indx++)
     340      if (vecus32x4_res[indx] != expectedus4_3[indx])
     341        abort ();
     342  
     343    vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusD));
     344  
     345    asm volatile ("" : : : "memory");
     346    for (indx = 0; indx < 4; indx++)
     347      if (vecus32x4_res[indx] != expectedus4_4[indx])
     348        abort ();
     349  
     350  /* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 8 } } */
     351  }
     352  
     353  
     354  void
     355  check_v4hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD)
     356  {
     357    int32_t indx;
     358    const int16_t vech16x4_buf[4] = {AH, BH, CH, DH};
     359    int16x4_t vech16x4_src = vld1_s16 (vech16x4_buf);
     360    int16_t vech16x4_res[4];
     361  
     362    vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhA));
     363  
     364    asm volatile ("" : : : "memory");
     365    for (indx = 0; indx < 4; indx++)
     366      if (vech16x4_res[indx] != expectedh4_1[indx])
     367        abort ();
     368  
     369    vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhB));
     370  
     371    asm volatile ("" : : : "memory");
     372    for (indx = 0; indx < 4; indx++)
     373      if (vech16x4_res[indx] != expectedh4_2[indx])
     374        abort ();
     375  
     376    vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhC));
     377  
     378    asm volatile ("" : : : "memory");
     379    for (indx = 0; indx < 4; indx++)
     380      if (vech16x4_res[indx] != expectedh4_3[indx])
     381        abort ();
     382  
     383    vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhD));
     384  
     385    asm volatile ("" : : : "memory");
     386    for (indx = 0; indx < 4; indx++)
     387      if (vech16x4_res[indx] != expectedh4_4[indx])
     388        abort ();
     389  }
     390  
     391  void
     392  check_v4hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
     393  		     uint16_t elemuhD)
     394  {
     395    int indx;
     396    const uint16_t vecuh16x4_buf[4] = {AUH, BUH, CUH, DUH};
     397    uint16x4_t vecuh16x4_src = vld1_u16 (vecuh16x4_buf);
     398    uint16_t vecuh16x4_res[4];
     399  
     400    vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhA));
     401  
     402    asm volatile ("" : : : "memory");
     403    for (indx = 0; indx < 4; indx++)
     404      if (vecuh16x4_res[indx] != expecteduh4_1[indx])
     405        abort ();
     406  
     407    vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhB));
     408  
     409    asm volatile ("" : : : "memory");
     410    for (indx = 0; indx < 4; indx++)
     411      if (vecuh16x4_res[indx] != expecteduh4_2[indx])
     412        abort ();
     413  
     414    vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhC));
     415  
     416    asm volatile ("" : : : "memory");
     417    for (indx = 0; indx < 4; indx++)
     418      if (vecuh16x4_res[indx] != expecteduh4_3[indx])
     419        abort ();
     420  
     421    vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhD));
     422  
     423    asm volatile ("" : : : "memory");
     424    for (indx = 0; indx < 4; indx++)
     425      if (vecuh16x4_res[indx] != expecteduh4_4[indx])
     426        abort ();
     427  
     428  /* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.4h, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 8 } } */
     429  }
     430  
     431  void
     432  check_v8hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD,
     433  	    int16_t elemhE, int16_t elemhF, int16_t elemhG, int16_t elemhH)
     434  {
     435    int32_t indx;
     436    const int16_t vech16x8_buf[8] = {AH, BH, CH, DH, EH, FH, GH, HH};
     437    int16x8_t vech16x8_src = vld1q_s16 (vech16x8_buf);
     438    int16_t vech16x8_res[8];
     439  
     440    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhA));
     441  
     442    asm volatile ("" : : : "memory");
     443    for (indx = 0; indx < 8; indx++)
     444      if (vech16x8_res[indx] != expectedh8_1[indx])
     445        abort ();
     446  
     447    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhB));
     448  
     449    asm volatile ("" : : : "memory");
     450    for (indx = 0; indx < 8; indx++)
     451      if (vech16x8_res[indx] != expectedh8_2[indx])
     452        abort ();
     453  
     454    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhC));
     455  
     456    asm volatile ("" : : : "memory");
     457    for (indx = 0; indx < 8; indx++)
     458      if (vech16x8_res[indx] != expectedh8_3[indx])
     459        abort ();
     460  
     461    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhD));
     462  
     463    asm volatile ("" : : : "memory");
     464    for (indx = 0; indx < 8; indx++)
     465      if (vech16x8_res[indx] != expectedh8_4[indx])
     466        abort ();
     467  
     468    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhE));
     469  
     470    asm volatile ("" : : : "memory");
     471    for (indx = 0; indx < 8; indx++)
     472      if (vech16x8_res[indx] != expectedh8_5[indx])
     473        abort ();
     474  
     475    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhF));
     476  
     477    asm volatile ("" : : : "memory");
     478    for (indx = 0; indx < 8; indx++)
     479      if (vech16x8_res[indx] != expectedh8_6[indx])
     480        abort ();
     481  
     482    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhG));
     483  
     484    asm volatile ("" : : : "memory");
     485    for (indx = 0; indx < 8; indx++)
     486      if (vech16x8_res[indx] != expectedh8_7[indx])
     487        abort ();
     488  
     489    vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhH));
     490  
     491    asm volatile ("" : : : "memory");
     492    for (indx = 0; indx < 8; indx++)
     493      if (vech16x8_res[indx] != expectedh8_8[indx])
     494        abort ();
     495  }
     496  
     497  void
     498  check_v8hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
     499  		     uint16_t elemuhD, uint16_t elemuhE, uint16_t elemuhF,
     500  		     uint16_t elemuhG, uint16_t elemuhH)
     501  {
     502    int indx;
     503    const uint16_t vecuh16x8_buf[8] = {AUH, BUH, CUH, DUH, EUH, FUH, GUH, HUH};
     504    uint16x8_t vecuh16x8_src = vld1q_u16 (vecuh16x8_buf);
     505    uint16_t vecuh16x8_res[8];
     506  
     507    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhA));
     508  
     509    asm volatile ("" : : : "memory");
     510    for (indx = 0; indx < 8; indx++)
     511      if (vecuh16x8_res[indx] != expecteduh8_1[indx])
     512        abort ();
     513  
     514    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhB));
     515  
     516    asm volatile ("" : : : "memory");
     517    for (indx = 0; indx < 8; indx++)
     518      if (vecuh16x8_res[indx] != expecteduh8_2[indx])
     519        abort ();
     520  
     521    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhC));
     522  
     523    asm volatile ("" : : : "memory");
     524    for (indx = 0; indx < 8; indx++)
     525      if (vecuh16x8_res[indx] != expecteduh8_3[indx])
     526        abort ();
     527  
     528    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhD));
     529  
     530    asm volatile ("" : : : "memory");
     531    for (indx = 0; indx < 8; indx++)
     532      if (vecuh16x8_res[indx] != expecteduh8_4[indx])
     533        abort ();
     534  
     535    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhE));
     536  
     537    asm volatile ("" : : : "memory");
     538    for (indx = 0; indx < 8; indx++)
     539      if (vecuh16x8_res[indx] != expecteduh8_5[indx])
     540        abort ();
     541  
     542    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhF));
     543  
     544    asm volatile ("" : : : "memory");
     545    for (indx = 0; indx < 8; indx++)
     546      if (vecuh16x8_res[indx] != expecteduh8_6[indx])
     547        abort ();
     548  
     549    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhG));
     550  
     551    asm volatile ("" : : : "memory");
     552    for (indx = 0; indx < 8; indx++)
     553      if (vecuh16x8_res[indx] != expecteduh8_7[indx])
     554        abort ();
     555  
     556    vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhH));
     557  
     558    asm volatile ("" : : : "memory");
     559    for (indx = 0; indx < 8; indx++)
     560      if (vecuh16x8_res[indx] != expecteduh8_8[indx])
     561        abort ();
     562  
     563  /* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 16 } } */
     564  }
     565  
     566  int
     567  main (void)
     568  {
     569    check_v2sf (_elemA, _elemB);
     570    check_v4sf (_elemA, _elemB, _elemC, _elemD);
     571    check_v2df (_elemdC, _elemdD);
     572    check_v2si (_elemsA, _elemsB);
     573    check_v4si (_elemsA, _elemsB, _elemsC, _elemsD);
     574    check_v4hi (_elemhA, _elemhB, _elemhC, _elemhD);
     575    check_v8hi (_elemhA, _elemhB, _elemhC, _elemhD,
     576  	      _elemhE, _elemhF, _elemhG, _elemhH);
     577    check_v2si_unsigned (_elemusA, _elemusB);
     578    check_v4si_unsigned (_elemusA, _elemusB, _elemusC, _elemusD);
     579    check_v4hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD);
     580    check_v8hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD,
     581  		       _elemuhE, _elemuhF, _elemuhG, _elemuhH);
     582  
     583    return 0;
     584  }
     585