1  /* PR target/97642 */
       2  /* { dg-do run { target *-*-linux* } } */
       3  /* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */
       4  /* { dg-require-effective-target avx512vl } */
       5  /* { dg-require-effective-target avx512dq } */
       6  /* { dg-require-effective-target avx512bw } */
       7  
       8  #include <assert.h>
       9  #include <immintrin.h>
      10  #include <stdint.h>
      11  #include <sys/mman.h>
      12  
      13  #define N 5
      14  
      15  // Faults with GCC because usage of vpblendd
      16  __m256i __attribute__((noinline)) mask_load(uint32_t * arr) {
      17    __m256i tmp;
      18    return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr);
      19  }
      20  
      21  // Faults
      22  __m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) {
      23    __m256i tmp = _mm256_set1_epi64x(0);
      24    asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t"
      25  	       : [ tmp ] "+x"(tmp)
      26  	       : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
      27  	       :);
      28    return tmp;
      29  }
      30  
      31  // Does not fault
      32  __m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) {
      33    __m256i           tmp;
      34    asm volatile(
      35  	       "movb %[m], %%al\n\t"
      36  	       "kmovb %%eax, %%k1\n\t"
      37  	       "vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t"
      38  	       : [ tmp ] "+x"(tmp)
      39  	       : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
      40  	       : "eax", "k1");
      41    return tmp;
      42  }
      43  
      44  
      45  void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) {
      46    return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v);
      47  }
      48  
      49  
      50  #define NPAGES      (2)
      51  #define END_OF_PAGE (1024 - N)
      52  
      53  #ifndef LOAD_METHOD
      54  #define LOAD_METHOD mask_load // mask_load_asm does not fault
      55  #endif
      56  
      57  
      58  int
      59  main() {
      60    if (!(__builtin_cpu_supports ("avx512dq")
      61  	&& __builtin_cpu_supports ("avx512vl")
      62  	&& __builtin_cpu_supports ("avx512bw")))
      63      return 0;
      64  
      65    uint32_t * addr =
      66      (uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE,
      67  		     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
      68  
      69    for (uint32_t i = 0; i < NPAGES; i += 2) {
      70  
      71      uint32_t page_offset      = 1024 * i + END_OF_PAGE;
      72      uint32_t next_page_offset = 1024 * (i + 1);
      73  
      74      assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE));
      75      mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset));
      76    }
      77  }