Viewing file: memset-corner-cases.c (1.59 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* { dg-do compile } */ /* { dg-options "-O2" } */ /* { dg-require-effective-target lp64 } */
#include <stdint.h>
/* One byte variable set should be scalar **set1byte: ** strb w1, \[x0\] ** ret */ void __attribute__((__noinline__)) set1byte (int64_t *src, char c) { __builtin_memset (src, c, 1); }
/* Special cases for setting 0. */ /* 1-byte should be STRB with wzr **set0byte: ** strb wzr, \[x0\] ** ret */ void __attribute__((__noinline__)) set0byte (int64_t *src) { __builtin_memset (src, 0, 1); }
/* 35bytes would become 4 scalar instructions. So favour NEON. **set0neon: ** movi v0.4s, 0 ** stp q0, q0, \[x0\] ** str wzr, \[x0, 31\] ** ret */ void __attribute__((__noinline__)) set0neon (int64_t *src) { __builtin_memset (src, 0, 35); }
/* 36bytes should be scalar however. **set0scalar: ** stp xzr, xzr, \[x0\] ** stp xzr, xzr, \[x0, 16\] ** str wzr, \[x0, 32\] ** ret */ void __attribute__((__noinline__)) set0scalar (int64_t *src) { __builtin_memset (src, 0, 36); }
/* 256-bytes expanded **set256byte: ** dup v0.16b, w1 ** stp q0, q0, \[x0\] ** stp q0, q0, \[x0, 32\] ** stp q0, q0, \[x0, 64\] ** stp q0, q0, \[x0, 96\] ** stp q0, q0, \[x0, 128\] ** stp q0, q0, \[x0, 160\] ** stp q0, q0, \[x0, 192\] ** stp q0, q0, \[x0, 224\] ** ret */ void __attribute__((__noinline__)) set256byte (int64_t *src, char c) { __builtin_memset (src, c, 256); }
/* More than 256 bytes goes to memset **set257byte: ** mov x2, 257 ** mov w1, 99 ** b memset */ void __attribute__((__noinline__)) set257byte (int64_t *src) { __builtin_memset (src, 'c', 257); }
/* { dg-final { check-function-bodies "**" "" "" } } */
|