Viewing file: slp-reduc-sad-2.c (1.32 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ /* { dg-do compile } */ /* { dg-require-effective-target vect_usad_char } */ /* With AVX256 or more we do not pull off the trick eliding the epilogue. */ /* { dg-additional-options "-mprefer-avx128" { target { x86_64-*-* i?86-*-* } } } */
typedef unsigned char uint8_t; int x264_pixel_sad_8x8( uint8_t *pix1, uint8_t *pix2, int i_stride_pix2 ) { int i_sum = 0; for( int y = 0; y < 8; y++ ) { i_sum += __builtin_abs( pix1[0] - pix2[0] ); i_sum += __builtin_abs( pix1[1] - pix2[1] ); i_sum += __builtin_abs( pix1[2] - pix2[2] ); i_sum += __builtin_abs( pix1[3] - pix2[3] ); i_sum += __builtin_abs( pix1[4] - pix2[4] ); i_sum += __builtin_abs( pix1[5] - pix2[5] ); i_sum += __builtin_abs( pix1[6] - pix2[6] ); i_sum += __builtin_abs( pix1[7] - pix2[7] ); pix1 += 16; pix2 += i_stride_pix2; } return i_sum; }
/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */ /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ /* { dg-final { scan-tree-dump-not "access with gaps requires scalar epilogue loop" "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|