1  /* { dg-do compile { target { ! ia32 } } } */
       2  /* { dg-options "-O2 -mavx512vl -masm=att" } */
       3  
       4  typedef float V1 __attribute__((vector_size (16)));
       5  typedef float V2 __attribute__((vector_size (32)));
       6  typedef int V4 __attribute__((vector_size (16)));
       7  typedef int V5 __attribute__((vector_size (32)));
       8  
       9  void
      10  f1 (V1 x)
      11  {
      12    register V1 a __asm ("xmm16");
      13    a = x;
      14    asm volatile ("" : "+v" (a));
      15    a = __builtin_shuffle (a, (V4) { 0, 0, 0, 0 });
      16    asm volatile ("" : "+v" (a));
      17  }
      18  
      19  void
      20  f2 (V1 x)
      21  {
      22    register V1 a __asm ("xmm16");
      23    a = x;
      24    asm volatile ("" : "+v" (a));
      25    a = __builtin_shuffle (a, (V4) { 1, 1, 1, 1 });
      26    asm volatile ("" : "+v" (a));
      27  }
      28  
      29  void
      30  f3 (V1 x)
      31  {
      32    register V1 a __asm ("xmm16");
      33    a = x;
      34    asm volatile ("" : "+v" (a));
      35    a = __builtin_shuffle (a, (V4) { 2, 2, 2, 2 });
      36    asm volatile ("" : "+v" (a));
      37  }
      38  
      39  void
      40  f4 (V1 x)
      41  {
      42    register V1 a __asm ("xmm16");
      43    a = x;
      44    asm volatile ("" : "+v" (a));
      45    a = __builtin_shuffle (a, (V4) { 3, 3, 3, 3 });
      46    asm volatile ("" : "+v" (a));
      47  }
      48  
      49  void
      50  f5 (V1 *x)
      51  {
      52    register V1 a __asm ("xmm16");
      53    a = __builtin_shuffle (*x, (V4) { 0, 0, 0, 0 });
      54    asm volatile ("" : "+v" (a));
      55  }
      56  
      57  void
      58  f6 (V1 *x)
      59  {
      60    register V1 a __asm ("xmm16");
      61    a = __builtin_shuffle (*x, (V4) { 1, 1, 1, 1 });
      62    asm volatile ("" : "+v" (a));
      63  }
      64  
      65  void
      66  f7 (V1 *x)
      67  {
      68    register V1 a __asm ("xmm16");
      69    a = __builtin_shuffle (*x, (V4) { 2, 2, 2, 2 });
      70    asm volatile ("" : "+v" (a));
      71  }
      72  
      73  void
      74  f8 (V1 *x)
      75  {
      76    register V1 a __asm ("xmm16");
      77    a = __builtin_shuffle (*x, (V4) { 3, 3, 3, 3 });
      78    asm volatile ("" : "+v" (a));
      79  }
      80  
      81  void
      82  f9 (V2 x)
      83  {
      84    register V2 a __asm ("xmm16");
      85    a = x;
      86    asm volatile ("" : "+v" (a));
      87    a = __builtin_shuffle (a, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
      88    asm volatile ("" : "+v" (a));
      89  }
      90  
      91  void
      92  f10 (V2 x)
      93  {
      94    register V2 a __asm ("xmm16");
      95    a = x;
      96    asm volatile ("" : "+v" (a));
      97    a = __builtin_shuffle (a, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
      98    asm volatile ("" : "+v" (a));
      99  }
     100  
     101  void
     102  f11 (V2 x)
     103  {
     104    register V2 a __asm ("xmm16");
     105    a = x;
     106    asm volatile ("" : "+v" (a));
     107    a = __builtin_shuffle (a, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
     108    asm volatile ("" : "+v" (a));
     109  }
     110  
     111  void
     112  f12 (V2 x)
     113  {
     114    register V2 a __asm ("xmm16");
     115    a = x;
     116    asm volatile ("" : "+v" (a));
     117    a = __builtin_shuffle (a, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
     118    asm volatile ("" : "+v" (a));
     119  }
     120  
     121  void
     122  f13 (V2 *x)
     123  {
     124    register V2 a __asm ("xmm16");
     125    a = __builtin_shuffle (*x, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
     126    asm volatile ("" : "+v" (a));
     127  }
     128  
     129  void
     130  f14 (V2 *x)
     131  {
     132    register V2 a __asm ("xmm16");
     133    a = __builtin_shuffle (*x, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
     134    asm volatile ("" : "+v" (a));
     135  }
     136  
     137  void
     138  f15 (V2 *x)
     139  {
     140    register V2 a __asm ("xmm16");
     141    a = __builtin_shuffle (*x, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
     142    asm volatile ("" : "+v" (a));
     143  }
     144  
     145  void
     146  f16 (V2 *x)
     147  {
     148    register V2 a __asm ("xmm16");
     149    a = __builtin_shuffle (*x, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
     150    asm volatile ("" : "+v" (a));
     151  }
     152  
     153  /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%xmm16" 4 } } */
     154  /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 3 } } */
     155  /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%ymm16" 3 } } */
     156  /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
     157  /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
     158  /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$170\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
     159  /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$255\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
     160  /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
     161  /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
     162  /* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$3\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */