Viewing file: mask_load_slp_1.c (2.64 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
#include <stdint.h>
#define MASK_SLP_2(TYPE_COND, ALT_VAL) \ void __attribute__ ((noinline, noclone)) \ mask_slp_##TYPE_COND##_2_##ALT_VAL (int *restrict x, int *restrict y, \ TYPE_COND *restrict z, int n) \ { \ for (int i = 0; i < n; i += 2) \ { \ x[i] = y[i] ? z[i] : 1; \ x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \ } \ }
#define MASK_SLP_4(TYPE_COND, ALT_VAL) \ void __attribute__ ((noinline, noclone)) \ mask_slp_##TYPE_COND##_4_##ALT_VAL (int *restrict x, int *restrict y, \ TYPE_COND *restrict z, int n) \ { \ for (int i = 0; i < n; i += 4) \ { \ x[i] = y[i] ? z[i] : 1; \ x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \ x[i + 2] = y[i + 2] ? z[i + 2] : 1; \ x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \ } \ }
#define MASK_SLP_8(TYPE_COND, ALT_VAL) \ void __attribute__ ((noinline, noclone)) \ mask_slp_##TYPE_COND##_8_##ALT_VAL (int *restrict x, int *restrict y, \ TYPE_COND *restrict z, int n) \ { \ for (int i = 0; i < n; i += 8) \ { \ x[i] = y[i] ? z[i] : 1; \ x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \ x[i + 2] = y[i + 2] ? z[i + 2] : 1; \ x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \ x[i + 4] = y[i + 4] ? z[i + 4] : 1; \ x[i + 5] = y[i + 5] ? z[i + 5] : ALT_VAL; \ x[i + 6] = y[i + 6] ? z[i + 6] : 1; \ x[i + 7] = y[i + 7] ? z[i + 7] : ALT_VAL; \ } \ }
#define MASK_SLP_FAIL(TYPE_COND) \ void __attribute__ ((noinline, noclone)) \ mask_slp_##TYPE_COND##_FAIL (int *restrict x, int *restrict y, \ TYPE_COND *restrict z, int n) \ { \ for (int i = 0; i < n; i += 2) \ { \ x[i] = y[i] ? z[i] : 1; \ x[i + 1] = y[i + 1] ? z[i + 1] : x[z[i + 1]]; \ } \ }
MASK_SLP_2(int8_t, 1) MASK_SLP_2(int8_t, 2) MASK_SLP_2(int, 1) MASK_SLP_2(int, 2) MASK_SLP_2(int64_t, 1) MASK_SLP_2(int64_t, 2)
MASK_SLP_4(int8_t, 1) MASK_SLP_4(int8_t, 2) MASK_SLP_4(int, 1) MASK_SLP_4(int, 2) MASK_SLP_4(int64_t, 1) MASK_SLP_4(int64_t, 2)
MASK_SLP_8(int8_t, 1) MASK_SLP_8(int8_t, 2) MASK_SLP_8(int, 1) MASK_SLP_8(int, 2) MASK_SLP_8(int64_t, 1) MASK_SLP_8(int64_t, 2)
MASK_SLP_FAIL(int8_t) MASK_SLP_FAIL(int) MASK_SLP_FAIL(int64_t)
/* { dg-final { scan-assembler-not {\tld2w\t} } } */ /* { dg-final { scan-assembler-not {\tst2w\t} } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 48 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 40 } } */
|