Viewing file: recip-test2.h (9.94 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* * Included file to common source float/double checking * The following macros should be defined: * TYPE -- floating point type * NAME -- convert a name to include the type * UNS_TYPE -- type to hold TYPE as an unsigned number * EXP_SIZE -- size in bits of the exponent * MAN_SIZE -- size in bits of the mantissa * UNS_ABS -- absolute value for UNS_TYPE * FABS -- absolute value function for TYPE * FMAX -- maximum function for TYPE * FMIN -- minimum function for TYPE * SQRT -- square root function for TYPE * RMIN -- minimum random number to generate * RMAX -- maximum random number to generate * ASMDIV -- assembler instruction to do divide * ASMSQRT -- assembler instruction to do square root * BDIV -- # of bits of inaccuracy to allow for division * BRSQRT -- # of bits of inaccuracy to allow for 1/sqrt * INIT_DIV -- Initial values to test 1/x against * INIT_RSQRT -- Initial values to test 1/sqrt(x) against */
typedef union { UNS_TYPE i; TYPE x; } NAME (union);
/* * Input/output arrays. */
static NAME (union) NAME (div_input) [] __attribute__((__aligned__(32))) = INIT_DIV; static NAME (union) NAME (rsqrt_input)[] __attribute__((__aligned__(32))) = INIT_RSQRT;
#define DIV_SIZE (sizeof (NAME (div_input)) / sizeof (TYPE)) #define RSQRT_SIZE (sizeof (NAME (rsqrt_input)) / sizeof (TYPE))
static TYPE NAME (div_expected)[DIV_SIZE] __attribute__((__aligned__(32))); static TYPE NAME (div_output) [DIV_SIZE] __attribute__((__aligned__(32)));
static TYPE NAME (rsqrt_expected)[RSQRT_SIZE] __attribute__((__aligned__(32))); static TYPE NAME (rsqrt_output) [RSQRT_SIZE] __attribute__((__aligned__(32)));
/* * Crack a floating point number into sign bit, exponent, and mantissa. */
static void NAME (crack) (TYPE number, unsigned int *p_sign, unsigned *p_exponent, UNS_TYPE *p_mantissa) { NAME (union) u; UNS_TYPE bits;
u.x = number; bits = u.i;
*p_sign = (unsigned int)((bits >> (EXP_SIZE + MAN_SIZE)) & 0x1); *p_exponent = (unsigned int)((bits >> MAN_SIZE) & ((((UNS_TYPE)1) << EXP_SIZE) - 1)); *p_mantissa = bits & ((((UNS_TYPE)1) << MAN_SIZE) - 1); return; }
/* * Prevent optimizer from eliminating + 0.0 to remove -0.0. */
volatile TYPE NAME (math_diff_0) = ((TYPE) 0.0);
/* * Return negative if two numbers are significanly different or return the * number of bits that are different in the mantissa. */
static int NAME (math_diff) (TYPE a, TYPE b, int bits) { TYPE zero = NAME (math_diff_0); unsigned int sign_a, sign_b; unsigned int exponent_a, exponent_b; UNS_TYPE mantissa_a, mantissa_b, diff; int i;
/* eliminate signed zero. */ a += zero; b += zero;
/* special case Nan. */ if (__builtin_isnan (a)) return (__builtin_isnan (b) ? 0 : -1);
if (a == b) return 0;
/* special case infinity. */ if (__builtin_isinf (a)) return (__builtin_isinf (b) ? 0 : -1);
/* punt on denormal numbers. */ if (!__builtin_isnormal (a) || !__builtin_isnormal (b)) return -1;
NAME (crack) (a, &sign_a, &exponent_a, &mantissa_a); NAME (crack) (b, &sign_b, &exponent_b, &mantissa_b);
/* If the sign is different, there is no hope. */ if (sign_a != sign_b) return -1;
/* If the exponent is off by 1, see if the values straddle the power of two, and adjust things to do the mantassa check if we can. */ if ((exponent_a == (exponent_b+1)) || (exponent_a == (exponent_b-1))) { TYPE big = FMAX (a, b); TYPE small = FMIN (a, b); TYPE diff = FABS (a - b); unsigned int sign_big, sign_small, sign_test; unsigned int exponent_big, exponent_small, exponent_test; UNS_TYPE mantissa_big, mantissa_small, mantissa_test;
NAME (crack) (big, &sign_big, &exponent_big, &mantissa_big); NAME (crack) (small, &sign_small, &exponent_small, &mantissa_small);
NAME (crack) (small - diff, &sign_test, &exponent_test, &mantissa_test); if ((sign_test == sign_small) && (exponent_test == exponent_small)) { mantissa_a = mantissa_small; mantissa_b = mantissa_test; }
else { NAME (crack) (big + diff, &sign_test, &exponent_test, &mantissa_test); if ((sign_test == sign_big) && (exponent_test == exponent_big)) { mantissa_a = mantissa_big; mantissa_b = mantissa_test; }
else return -1; } }
else if (exponent_a != exponent_b) return -1;
diff = UNS_ABS (mantissa_a - mantissa_b); for (i = MAN_SIZE; i > 0; i--) { if ((diff & ((UNS_TYPE)1) << (i-1)) != 0) return i; }
return -1; }
/* * Turn off inlining to make code inspection easier. */
static void NAME (asm_div) (void) __attribute__((__noinline__)); static void NAME (vector_div) (void) __attribute__((__noinline__)); static void NAME (scalar_div) (void) __attribute__((__noinline__)); static void NAME (asm_rsqrt) (void) __attribute__((__noinline__)); static void NAME (vector_rsqrt) (void) __attribute__((__noinline__)); static void NAME (scalar_rsqrt) (void) __attribute__((__noinline__)); static void NAME (check_div) (const char *) __attribute__((__noinline__)); static void NAME (check_rsqrt) (const char *) __attribute__((__noinline__)); static void NAME (run) (void) __attribute__((__noinline__));
/* * Division function that might be vectorized. */
static void NAME (vector_div) (void) { size_t i;
for (i = 0; i < DIV_SIZE; i++) NAME (div_output)[i] = ((TYPE) 1.0) / NAME (div_input)[i].x; }
/* * Division function that is not vectorized. */
static void NAME (scalar_div) (void) { size_t i;
for (i = 0; i < DIV_SIZE; i++) { TYPE x = ((TYPE) 1.0) / NAME (div_input)[i].x; TYPE y; __asm__ ("" : "=d" (y) : "0" (x)); NAME (div_output)[i] = y; } }
/* * Generate the division instruction via asm. */
static void NAME (asm_div) (void) { size_t i;
for (i = 0; i < DIV_SIZE; i++) { TYPE x; __asm__ (ASMDIV " %0,%1,%2" : "=d" (x) : "d" ((TYPE) 1.0), "d" (NAME (div_input)[i].x)); NAME (div_expected)[i] = x; } }
/* * Reciprocal square root function that might be vectorized. */
static void NAME (vector_rsqrt) (void) { size_t i;
for (i = 0; i < RSQRT_SIZE; i++) NAME (rsqrt_output)[i] = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x); }
/* * Reciprocal square root function that is not vectorized. */
static void NAME (scalar_rsqrt) (void) { size_t i;
for (i = 0; i < RSQRT_SIZE; i++) { TYPE x = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x); TYPE y; __asm__ ("" : "=d" (y) : "0" (x)); NAME (rsqrt_output)[i] = y; } }
/* * Generate the 1/sqrt instructions via asm. */
static void NAME (asm_rsqrt) (void) { size_t i;
for (i = 0; i < RSQRT_SIZE; i++) { TYPE x; TYPE y; __asm__ (ASMSQRT " %0,%1" : "=d" (x) : "d" (NAME (rsqrt_input)[i].x)); __asm__ (ASMDIV " %0,%1,%2" : "=d" (y) : "d" ((TYPE) 1.0), "d" (x)); NAME (rsqrt_expected)[i] = y; } }
/* * Functions to abort or report errors. */
static int NAME (error_count) = 0;
#ifdef VERBOSE static int NAME (max_bits_div) = 0; static int NAME (max_bits_rsqrt) = 0; #endif
/* * Compare the expected value with the value we got. */
static void NAME (check_div) (const char *test) { size_t i; int b;
for (i = 0; i < DIV_SIZE; i++) { TYPE exp = NAME (div_expected)[i]; TYPE out = NAME (div_output)[i]; b = NAME (math_diff) (exp, out, BDIV);
#ifdef VERBOSE if (b != 0) { NAME (union) u_in = NAME (div_input)[i]; NAME (union) u_exp; NAME (union) u_out; char explanation[64]; const char *p_exp;
if (b < 0) p_exp = "failed"; else { p_exp = explanation; sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : ""); }
u_exp.x = exp; u_out.x = out; printf ("%s %s %s for 1.0 / %g [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n", TNAME (TYPE), test, p_exp, (double) u_in.x, (unsigned long long) u_in.i, (double) exp, (unsigned long long) u_exp.i, (double) out, (unsigned long long) u_out.i); } #endif
if (b < 0 || b > BDIV) NAME (error_count)++;
#ifdef VERBOSE if (b > NAME (max_bits_div)) NAME (max_bits_div) = b; #endif } }
static void NAME (check_rsqrt) (const char *test) { size_t i; int b;
for (i = 0; i < RSQRT_SIZE; i++) { TYPE exp = NAME (rsqrt_expected)[i]; TYPE out = NAME (rsqrt_output)[i]; b = NAME (math_diff) (exp, out, BRSQRT);
#ifdef VERBOSE if (b != 0) { NAME (union) u_in = NAME (rsqrt_input)[i]; NAME (union) u_exp; NAME (union) u_out; char explanation[64]; const char *p_exp;
if (b < 0) p_exp = "failed"; else { p_exp = explanation; sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : ""); }
u_exp.x = exp; u_out.x = out; printf ("%s %s %s for 1 / sqrt (%g) [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n", TNAME (TYPE), test, p_exp, (double) u_in.x, (unsigned long long) u_in.i, (double) exp, (unsigned long long) u_exp.i, (double) out, (unsigned long long) u_out.i); } #endif
if (b < 0 || b > BRSQRT) NAME (error_count)++;
#ifdef VERBOSE if (b > NAME (max_bits_rsqrt)) NAME (max_bits_rsqrt) = b; #endif } }
/* * Now do everything. */
static void NAME (run) (void) { #ifdef VERBOSE printf ("start run_%s, divide size = %ld, rsqrt size = %ld, %d bit%s for a/b, %d bit%s for 1/sqrt(a)\n", TNAME (TYPE), (long)DIV_SIZE, (long)RSQRT_SIZE, BDIV, (BDIV == 1) ? "" : "s", BRSQRT, (BRSQRT == 1) ? "" : "s"); #endif
NAME (asm_div) ();
NAME (scalar_div) (); NAME (check_div) ("scalar");
NAME (vector_div) (); NAME (check_div) ("vector");
NAME (asm_rsqrt) ();
NAME (scalar_rsqrt) (); NAME (check_rsqrt) ("scalar");
NAME (vector_rsqrt) (); NAME (check_rsqrt) ("vector");
#ifdef VERBOSE printf ("end run_%s, errors = %d, max div bits = %d, max rsqrt bits = %d\n", TNAME (TYPE), NAME (error_count), NAME (max_bits_div), NAME (max_bits_rsqrt)); #endif }
|