Viewing file: pr97642-2.c (2.01 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* PR target/97642 */ /* { dg-do run { target *-*-linux* } } */ /* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512dq } */ /* { dg-require-effective-target avx512bw } */
#include <assert.h> #include <immintrin.h> #include <stdint.h> #include <sys/mman.h>
#define N 5
// Faults with GCC because usage of vpblendd __m256i __attribute__((noinline)) mask_load(uint32_t * arr) { __m256i tmp; return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr); }
// Faults __m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) { __m256i tmp = _mm256_set1_epi64x(0); asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t" : [ tmp ] "+x"(tmp) : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1)) :); return tmp; }
// Does not fault __m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) { __m256i tmp; asm volatile( "movb %[m], %%al\n\t" "kmovb %%eax, %%k1\n\t" "vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t" : [ tmp ] "+x"(tmp) : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1)) : "eax", "k1"); return tmp; }
void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) { return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v); }
#define NPAGES (2) #define END_OF_PAGE (1024 - N)
#ifndef LOAD_METHOD #define LOAD_METHOD mask_load // mask_load_asm does not fault #endif
int main() { if (!(__builtin_cpu_supports ("avx512dq") && __builtin_cpu_supports ("avx512vl") && __builtin_cpu_supports ("avx512bw"))) return 0;
uint32_t * addr = (uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
for (uint32_t i = 0; i < NPAGES; i += 2) {
uint32_t page_offset = 1024 * i + END_OF_PAGE; uint32_t next_page_offset = 1024 * (i + 1);
assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE)); mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset)); } }
|