Viewing file: fpu-387.h (11.63 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* FPU-related code for x86 and x86_64 processors. Copyright (C) 2005-2022 Free Software Foundation, Inc. Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
This file is part of the GNU Fortran 95 runtime library (libgfortran).
Libgfortran is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
Libgfortran is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */
#ifndef __SSE_MATH__ #include "cpuid.h" #endif
static int has_sse (void) { #ifndef __SSE_MATH__ unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) return 0;
return edx & bit_SSE; #else return 1; #endif }
/* i387 exceptions -- see linux <fpu_control.h> header file for details. */ #define _FPU_MASK_IM 0x01 #define _FPU_MASK_DM 0x02 #define _FPU_MASK_ZM 0x04 #define _FPU_MASK_OM 0x08 #define _FPU_MASK_UM 0x10 #define _FPU_MASK_PM 0x20 #define _FPU_MASK_ALL 0x3f
#define _FPU_EX_ALL 0x3f
/* i387 rounding modes. */
#define _FPU_RC_NEAREST 0x0 #define _FPU_RC_DOWN 0x1 #define _FPU_RC_UP 0x2 #define _FPU_RC_ZERO 0x3
#define _FPU_RC_MASK 0x3
/* Enable flush to zero mode. */
#define MXCSR_FTZ (1 << 15)
/* This structure corresponds to the layout of the block written by FSTENV. */ struct fenv { unsigned short int __control_word; unsigned short int __unused1; unsigned short int __status_word; unsigned short int __unused2; unsigned short int __tags; unsigned short int __unused3; unsigned int __eip; unsigned short int __cs_selector; unsigned int __opcode:11; unsigned int __unused4:5; unsigned int __data_offset; unsigned short int __data_selector; unsigned short int __unused5; unsigned int __mxcsr; } __attribute__ ((gcc_struct));
/* Check we can actually store the FPU state in the allocated size. */ _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE, "GFC_FPE_STATE_BUFFER_SIZE is too small");
#ifdef __SSE_MATH__ # define __math_force_eval_div(x, y) \ do { \ __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \ } while (0) #else # define __math_force_eval_div(x, y) \ do { \ __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \ } while (0) #endif
/* Raise the supported floating-point exceptions from EXCEPTS. Other bits in EXCEPTS are ignored. Code originally borrowed from libatomic/config/x86/fenv.c. */
static void local_feraiseexcept (int excepts) { struct fenv temp;
if (excepts & _FPU_MASK_IM) { float f = 0.0f; __math_force_eval_div (f, f); } if (excepts & _FPU_MASK_DM) { __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); temp.__status_word |= _FPU_MASK_DM; __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); __asm__ __volatile__ ("fwait"); } if (excepts & _FPU_MASK_ZM) { float f = 1.0f, g = 0.0f; __math_force_eval_div (f, g); } if (excepts & _FPU_MASK_OM) { __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); temp.__status_word |= _FPU_MASK_OM; __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); __asm__ __volatile__ ("fwait"); } if (excepts & _FPU_MASK_UM) { __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); temp.__status_word |= _FPU_MASK_UM; __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); __asm__ __volatile__ ("fwait"); } if (excepts & _FPU_MASK_PM) { float f = 1.0f, g = 3.0f; __math_force_eval_div (f, g); } }
void set_fpu_trap_exceptions (int trap, int notrap) { int exc_set = 0, exc_clr = 0; unsigned short cw;
if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM; if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM; if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM; if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM; if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM; if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM; if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM; if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM; if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM; if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM; if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
__asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
cw |= exc_clr; cw &= ~exc_set;
__asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
if (has_sse()) { unsigned int cw_sse;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
/* The SSE exception masks are shifted by 7 bits. */ cw_sse |= (exc_clr << 7); cw_sse &= ~(exc_set << 7);
/* Clear stalled exception flags. */ cw_sse &= ~_FPU_EX_ALL;
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); } }
void set_fpu (void) { set_fpu_trap_exceptions (options.fpe, 0); }
int get_fpu_trap_exceptions (void) { unsigned short cw; int mask; int res = 0;
__asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); mask = cw;
if (has_sse()) { unsigned int cw_sse;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
/* The SSE exception masks are shifted by 7 bits. */ mask |= (cw_sse >> 7); }
mask = ~mask & _FPU_MASK_ALL;
if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID; if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
return res; }
int support_fpu_trap (int flag __attribute__((unused))) { return 1; }
int get_fpu_except_flags (void) { unsigned short cw; int excepts; int res = 0;
__asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw)); excepts = cw;
if (has_sse()) { unsigned int cw_sse;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); excepts |= cw_sse; }
excepts &= _FPU_EX_ALL;
if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID; if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
return res; }
void set_fpu_except_flags (int set, int clear) { struct fenv temp; int exc_set = 0, exc_clr = 0;
/* Translate from GFC_PE_* values to _FPU_MASK_* values. */ if (set & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM; if (clear & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
if (set & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM; if (clear & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
if (set & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM; if (clear & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
if (set & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM; if (clear & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
if (set & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM; if (clear & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
if (set & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM; if (clear & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
/* Change the flags. This is tricky on 387 (unlike SSE), because we have FNSTSW but no FLDSW instruction. */ __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); temp.__status_word &= ~exc_clr; __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
/* Change the flags on SSE. */
if (has_sse()) { unsigned int cw_sse;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); cw_sse &= ~exc_clr; __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); }
local_feraiseexcept (exc_set); }
int support_fpu_flag (int flag __attribute__((unused))) { return 1; }
void set_fpu_rounding_mode (int round) { int round_mode; unsigned short cw;
switch (round) { case GFC_FPE_TONEAREST: round_mode = _FPU_RC_NEAREST; break; case GFC_FPE_UPWARD: round_mode = _FPU_RC_UP; break; case GFC_FPE_DOWNWARD: round_mode = _FPU_RC_DOWN; break; case GFC_FPE_TOWARDZERO: round_mode = _FPU_RC_ZERO; break; default: return; /* Should be unreachable. */ }
__asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
/* The x87 round control bits are shifted by 10 bits. */ cw &= ~(_FPU_RC_MASK << 10); cw |= round_mode << 10;
__asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
if (has_sse()) { unsigned int cw_sse;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
/* The SSE round control bits are shifted by 13 bits. */ cw_sse &= ~(_FPU_RC_MASK << 13); cw_sse |= round_mode << 13;
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); } }
int get_fpu_rounding_mode (void) { int round_mode;
#ifdef __SSE_MATH__ unsigned int cw;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
/* The SSE round control bits are shifted by 13 bits. */ round_mode = cw >> 13; #else unsigned short cw;
__asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
/* The x87 round control bits are shifted by 10 bits. */ round_mode = cw >> 10; #endif
round_mode &= _FPU_RC_MASK;
switch (round_mode) { case _FPU_RC_NEAREST: return GFC_FPE_TONEAREST; case _FPU_RC_UP: return GFC_FPE_UPWARD; case _FPU_RC_DOWN: return GFC_FPE_DOWNWARD; case _FPU_RC_ZERO: return GFC_FPE_TOWARDZERO; default: return 0; /* Should be unreachable. */ } }
int support_fpu_rounding_mode (int mode __attribute__((unused))) { return 1; }
void get_fpu_state (void *state) { struct fenv *envp = state;
__asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
/* fnstenv has the side effect of masking all exceptions, so we need to restore the control word after that. */ __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
if (has_sse()) __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr)); }
void set_fpu_state (void *state) { struct fenv *envp = state;
/* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more complex than this, but I think it suffices in our case. */ __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
if (has_sse()) __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr)); }
int support_fpu_underflow_control (int kind) { if (!has_sse()) return 0;
return (kind == 4 || kind == 8) ? 1 : 0; }
int get_fpu_underflow_mode (void) { unsigned int cw_sse;
if (!has_sse()) return 1;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
/* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */ return (cw_sse & MXCSR_FTZ) ? 0 : 1; }
void set_fpu_underflow_mode (int gradual) { unsigned int cw_sse;
if (!has_sse()) return;
__asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
if (gradual) cw_sse &= ~MXCSR_FTZ; else cw_sse |= MXCSR_FTZ;
__asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); }
|