1  /* { dg-do compile } */
       2  /* { dg-options "-mavx512f -mavx512vl -O2" } */
       3  /* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
       4  /* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
       5  /* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
       6  /* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
       7  /* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
       8  /* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
       9  /* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      10  /* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      11  
      12  /* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  4 } } */
      13  /* { dg-final { scan-assembler-times "(?:vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}|blend\[a-z]+\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+)(?:\n|\[ \\t\]+#)"  4 } } */
      14  /* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      15  /* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      16  /* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      17  /* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      18  /* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      19  /* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      20  /* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      21  /* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
      22  
      23  /* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  4 } } */
      24  /* { dg-final { scan-assembler-times "(?:vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}|(?:blend\[a-z]+|movsd)\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+)(?:\n|\[ \\t\]+#)"  4 } } */
      25  
      26  #include <immintrin.h>
      27  
      28  int *pi32;
      29  long long *pi64;
      30  double *pd;
      31  float *pf;
      32  volatile __m256i xi32, xi64;
      33  volatile __m256d xd;
      34  volatile __m256 xf;
      35  
      36  volatile __m128i xi32_128, xi64_128;
      37  volatile __m128d xd_128;
      38  volatile __m128 xf_128;
      39  
      40  void extern
      41  avx512vl_test (void)
      42  {
      43    xi32 = _mm256_mask_expand_epi32 (xi32, 0, xi32);
      44    xi32 = _mm256_mask_expand_epi32 (xi32, -1, xi32);
      45    xi32 = _mm256_mask_expand_epi32 (xi32, (1 << 4) - 1, xi32);
      46    xi32 = _mm256_mask_expand_epi32 (xi32, (1 << 4) + 1, xi32);
      47  
      48    xi32 = _mm256_mask_expandloadu_epi32 (xi32, 0, pi32);
      49    xi32 = _mm256_mask_expandloadu_epi32 (xi32, (1 << 8) - 1, pi32);
      50    xi32 = _mm256_mask_expandloadu_epi32 (xi32, (1 << 6) - 1, pi32);
      51    xi32 = _mm256_mask_expandloadu_epi32 (xi32, (1 << 6) + 3, pi32);
      52  
      53    xi64 = _mm256_mask_expand_epi64 (xi64, 0, xi64);
      54    xi64 = _mm256_mask_expand_epi64 (xi64, -1, xi64);
      55    xi64 = _mm256_mask_expand_epi64 (xi64, (1 << 3) - 1, xi64);
      56    xi64 = _mm256_mask_expand_epi64 (xi64, (1 << 3) + 2, xi64);
      57  
      58    xi64 = _mm256_mask_expandloadu_epi64 (xi64, 0, pi64);
      59    xi64 = _mm256_mask_expandloadu_epi64 (xi64, (1 << 4) - 1, pi64);
      60    xi64 = _mm256_mask_expandloadu_epi64 (xi64, (1 << 2) - 1, pi64);
      61    xi64 = _mm256_mask_expandloadu_epi64 (xi64, (1 << 2), pi64);
      62  
      63    xf = _mm256_mask_expand_ps (xf, 0, xf);
      64    xf = _mm256_mask_expand_ps (xf, (1 << 8) - 1, xf);
      65    xf = _mm256_mask_expand_ps (xf, (1 << 6) - 1, xf);
      66    xf = _mm256_mask_expand_ps (xf, (1 << 6) + 3, xf);
      67  
      68    xf = _mm256_mask_expandloadu_ps (xf, 0, pf);
      69    xf = _mm256_mask_expandloadu_ps (xf, -1, pf);
      70    xf = _mm256_mask_expandloadu_ps (xf, (1 << 7) - 1, pf);
      71    xf = _mm256_mask_expandloadu_ps (xf, (1 << 7) + 5, pf);
      72  
      73    xd = _mm256_mask_expand_pd (xd, 0, xd);
      74    xd = _mm256_mask_expand_pd (xd, (1 << 4) - 1, xd);
      75    xd = _mm256_mask_expand_pd (xd, (1 << 2) - 1, xd);
      76    xd = _mm256_mask_expand_pd (xd, (1 << 2), xd);
      77  
      78    xd = _mm256_mask_expandloadu_pd (xd, 0, pd);
      79    xd = _mm256_mask_expandloadu_pd (xd, -1, pd);
      80    xd = _mm256_mask_expandloadu_pd (xd, (1 << 2) - 1, pd);
      81    xd = _mm256_mask_expandloadu_pd (xd, (1 << 2), pd);
      82  
      83    xi32_128 = _mm_mask_expand_epi32 (xi32_128, 0, xi32_128);
      84    xi32_128 = _mm_mask_expand_epi32 (xi32_128, -1, xi32_128);
      85    xi32_128 = _mm_mask_expand_epi32 (xi32_128, (1 << 3) - 1, xi32_128);
      86    xi32_128 = _mm_mask_expand_epi32 (xi32_128, (1 << 3) + 1, xi32_128);
      87  
      88    xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, 0, pi32);
      89    xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, (1 << 4) - 1, pi32);
      90    xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, (1 << 2) - 1, pi32);
      91    xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, (1 << 1) + 3, pi32);
      92  
      93    xi64_128 = _mm_mask_expand_epi64 (xi64_128, 0, xi64_128);
      94    xi64_128 = _mm_mask_expand_epi64 (xi64_128, -1, xi64_128);
      95    xi64_128 = _mm_mask_expand_epi64 (xi64_128, (1 << 1) - 1, xi64_128);
      96    xi64_128 = _mm_mask_expand_epi64 (xi64_128, 2, xi64_128);
      97  
      98    xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 0, pi64);
      99    xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 3, pi64);
     100    xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 1, pi64);
     101    xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 2, pi64);
     102  
     103    xf_128 = _mm_mask_expand_ps (xf_128, 0, xf_128);
     104    xf_128 = _mm_mask_expand_ps (xf_128, (1 << 4) - 1, xf_128);
     105    xf_128 = _mm_mask_expand_ps (xf_128, (1 << 3) - 1, xf_128);
     106    xf_128 = _mm_mask_expand_ps (xf_128, (1 << 2), xf_128);
     107  
     108    xf_128 = _mm_mask_expandloadu_ps (xf_128, 0, pf);
     109    xf_128 = _mm_mask_expandloadu_ps (xf_128, -1, pf);
     110    xf_128 = _mm_mask_expandloadu_ps (xf_128, (1 << 3) - 1, pf);
     111    xf_128 = _mm_mask_expandloadu_ps (xf_128, (1 << 1), pf);
     112  
     113    xd_128 = _mm_mask_expand_pd (xd_128, 0, xd_128);
     114    xd_128 = _mm_mask_expand_pd (xd_128, (1 << 2) - 1, xd_128);
     115    xd_128 = _mm_mask_expand_pd (xd_128, 1, xd_128);
     116    xd_128 = _mm_mask_expand_pd (xd_128, 2, xd_128);
     117  
     118    xd_128 = _mm_mask_expandloadu_pd (xd_128, 0, pd);
     119    xd_128 = _mm_mask_expandloadu_pd (xd_128, -1, pd);
     120    xd_128 = _mm_mask_expandloadu_pd (xd_128, 1, pd);
     121    xd_128 = _mm_mask_expandloadu_pd (xd_128, 2, pd);
     122  }