Viewing file: vldst24q_reg_offset.c (10 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* { dg-require-effective-target arm_v8_1m_mve_ok } */ /* { dg-add-options arm_v8_1m_mve } */ /* { dg-additional-options "-O1" } */ /* { dg-final { check-function-bodies "**" "" } } */
#include "arm_mve.h"
#ifdef __cplusplus extern "C" { #endif
/* **test: ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test(const uint8_t * in, uint8_t * out, int width) { uint8x16x2_t rg = vld2q(in); uint8x16x2_t gb = vld2q(in + width); vst2q (out, rg); vst2q (out + width, gb); }
/* **test2: ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test2(const uint8_t * in, uint8_t * out) { uint8x16x2_t rg = vld2q(in); uint8x16x2_t gb = vld2q(in + 32); vst2q (out, rg); vst2q (out + 32, gb); }
/* **test3: ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test3(const uint8_t * in, uint8_t * out) { uint8x16x2_t rg = vld2q(in); uint8x16x2_t gb = vld2q(in - 32); vst2q (out, rg); vst2q (out - 32, gb); }
/* **test4: ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test4(const uint8_t * in, uint8_t * out) { uint8x16x2_t rg = vld2q(in); uint8x16x2_t gb = vld2q(in + 64); vst2q (out, rg); vst2q (out + 64, gb); }
/* **test5: ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test5(const uint8_t * in, uint8_t * out) { uint8x16x2_t rg = vld2q(in); uint8x16x2_t gb = vld2q(in + 42); vst2q (out, rg); vst2q (out + 42, gb); }
/* **test6: ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test6(const uint8_t * in, uint8_t * out, int width) { uint8x16x4_t rg = vld4q(in); uint8x16x4_t gb = vld4q(in + width); vst4q (out, rg); vst4q (out + width, gb); }
/* **test7: ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test7(const uint8_t * in, uint8_t * out) { uint8x16x4_t rg = vld4q(in); uint8x16x4_t gb = vld4q(in + 32); vst4q (out, rg); vst4q (out + 32, gb); }
/* **test8: ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test8(const uint8_t * in, uint8_t * out) { uint8x16x4_t rg = vld4q(in); uint8x16x4_t gb = vld4q(in + 64); vst4q (out, rg); vst4q (out + 64, gb); }
/* **test9: ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test9(const uint8_t * in, uint8_t * out) { uint8x16x4_t rg = vld4q(in); uint8x16x4_t gb = vld4q(in - 64); vst4q (out, rg); vst4q (out - 64, gb); }
/* **test10: ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... ** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] ** ... */ void test10(const uint8_t * in, uint8_t * out) { uint8x16x4_t rg = vld4q(in); uint8x16x4_t gb = vld4q(in + 42); vst4q (out, rg); vst4q (out + 42, gb); }
#ifdef __cplusplus } #endif
/* { dg-final { scan-assembler-not "__ARM_undef" } } */
|