aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2020-06-25 23:58:31 +0000
committerPaolo Bonzini <pbonzini@redhat.com>2020-07-10 18:02:17 -0400
commit418b0f93d12a1589d5031405de857844f32e9ccc (patch)
tree2684d445b3b701c9cd347b9f9ca0eec01bea393f /target
parent3ddc0eca2229846bfecc3485648a6cb85a466dc7 (diff)
target/i386: fix IEEE SSE floating-point exception raising
The SSE instruction implementations all fail to raise the expected IEEE floating-point exceptions because they do nothing to convert the exception state from the softfloat machinery into the exception flags in MXCSR. Fix this by adding such conversions. Unlike for x87, emulated SSE floating-point operations might be optimized using hardware floating point on the host, and so a different approach is taken that is compatible with such optimizations. The required invariant is that all exceptions set in env->sse_status (other than "denormal operand", for which the SSE semantics are different from those in the softfloat code) are ones that are set in the MXCSR; the emulated MXCSR is updated lazily when code reads MXCSR, while when code sets MXCSR, the exceptions in env->sse_status are set accordingly. A few instructions do not raise all the exceptions that would be raised by the softfloat code, and those instructions are made to save and restore the softfloat exception state accordingly. Nothing is done about "denormal operand"; setting that (only for the case when input denormals are *not* flushed to zero, the opposite of the logic in the softfloat code for such an exception) will require custom code for relevant instructions, or else architecture-specific conditionals in the softfloat code for when to set such an exception together with custom code for various SSE conversion and rounding instructions that do not set that exception. Nothing is done about trapping exceptions (for which there is minimal and largely broken support in QEMU's emulation in the x87 case and no support at all in the SSE case). Signed-off-by: Joseph Myers <joseph@codesourcery.com> Message-Id: <alpine.DEB.2.21.2006252358000.3832@digraph.polyomino.org.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'target')
-rw-r--r--target/i386/Makefile.objs1
-rw-r--r--target/i386/cpu.h1
-rw-r--r--target/i386/fpu_helper.c35
-rw-r--r--target/i386/gdbstub.c1
-rw-r--r--target/i386/helper.c1
-rw-r--r--target/i386/helper.h1
-rw-r--r--target/i386/ops_sse.h28
-rw-r--r--target/i386/tcg-stub.c25
-rw-r--r--target/i386/translate.c1
9 files changed, 82 insertions, 12 deletions
diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs
index 48e0c28434..0b93143e27 100644
--- a/target/i386/Makefile.objs
+++ b/target/i386/Makefile.objs
@@ -3,6 +3,7 @@ obj-$(CONFIG_TCG) += translate.o
obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o
obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o
obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o
+obj-$(call lnot,$(CONFIG_TCG)) += tcg-stub.o
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
ifeq ($(CONFIG_SOFTMMU),y)
obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 7d77efd9e4..06b2e3a5c6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2143,6 +2143,7 @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env)
/* fpu_helper.c */
void update_fp_status(CPUX86State *env);
void update_mxcsr_status(CPUX86State *env);
+void update_mxcsr_from_sse_status(CPUX86State *env);
static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
{
diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c
index ec5b9db8b9..f5e6c4b88d 100644
--- a/target/i386/fpu_helper.c
+++ b/target/i386/fpu_helper.c
@@ -2539,6 +2539,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
+ update_mxcsr_from_sse_status(env);
cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
}
@@ -2968,6 +2969,14 @@ void update_mxcsr_status(CPUX86State *env)
}
set_float_rounding_mode(rnd_type, &env->sse_status);
+ /* Set exception flags. */
+ set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
+ (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
+ (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
+ (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
+ (mxcsr & FPUS_PE ? float_flag_inexact : 0),
+ &env->sse_status);
+
/* set denormals are zero */
set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
@@ -2975,6 +2984,32 @@ void update_mxcsr_status(CPUX86State *env)
set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
}
+void update_mxcsr_from_sse_status(CPUX86State *env)
+{
+ if (tcg_enabled()) {
+ uint8_t flags = get_float_exception_flags(&env->sse_status);
+ /*
+ * The MXCSR denormal flag has opposite semantics to
+ * float_flag_input_denormal (the softfloat code sets that flag
+ * only when flushing input denormals to zero, but SSE sets it
+ * only when not flushing them to zero), so is not converted
+ * here.
+ */
+ env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
+ (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
+ (flags & float_flag_overflow ? FPUS_OE : 0) |
+ (flags & float_flag_underflow ? FPUS_UE : 0) |
+ (flags & float_flag_inexact ? FPUS_PE : 0) |
+ (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
+ 0));
+ }
+}
+
+void helper_update_mxcsr(CPUX86State *env)
+{
+ update_mxcsr_from_sse_status(env);
+}
+
void helper_ldmxcsr(CPUX86State *env, uint32_t val)
{
cpu_set_mxcsr(env, val);
diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c
index b98a99500a..9ae43bda0f 100644
--- a/target/i386/gdbstub.c
+++ b/target/i386/gdbstub.c
@@ -184,6 +184,7 @@ int x86_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
return gdb_get_reg32(mem_buf, 0); /* fop */
case IDX_MXCSR_REG:
+ update_mxcsr_from_sse_status(env);
return gdb_get_reg32(mem_buf, env->mxcsr);
case IDX_CTL_CR0_REG:
diff --git a/target/i386/helper.c b/target/i386/helper.c
index c3a6e4fabe..fa2a1dcdda 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -544,6 +544,7 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags)
for(i = 0; i < 8; i++) {
fptag |= ((!env->fptags[i]) << i);
}
+ update_mxcsr_from_sse_status(env);
qemu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n",
env->fpuc,
(env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11,
diff --git a/target/i386/helper.h b/target/i386/helper.h
index 8f9e1905c3..c2ae2f7e61 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -207,6 +207,7 @@ DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
/* MMX/SSE */
DEF_HELPER_2(ldmxcsr, void, env, i32)
+DEF_HELPER_1(update_mxcsr, void, env)
DEF_HELPER_1(enter_mmx, void, env)
DEF_HELPER_1(emms, void, env)
DEF_HELPER_3(movq, void, env, ptr, ptr)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 14f2b16abd..c7614f8b0b 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -843,6 +843,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s)
void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(0), &env->sse_status),
&env->sse_status);
@@ -855,26 +856,33 @@ void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
d->ZMM_S(3) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(3), &env->sse_status),
&env->sse_status);
+ set_float_exception_flags(old_flags, &env->sse_status);
}
void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(0), &env->sse_status),
&env->sse_status);
+ set_float_exception_flags(old_flags, &env->sse_status);
}
void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status);
d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status);
d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status);
+ set_float_exception_flags(old_flags, &env->sse_status);
}
void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
+ set_float_exception_flags(old_flags, &env->sse_status);
}
static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
@@ -1764,6 +1772,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1789,19 +1798,18 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status);
d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status);
-#if 0 /* TODO */
- if (mode & (1 << 3)) {
+ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact,
&env->sse_status);
}
-#endif
env->sse_status.float_rounding_mode = prev_rounding_mode;
}
void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1825,19 +1833,18 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status);
-#if 0 /* TODO */
- if (mode & (1 << 3)) {
+ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact,
&env->sse_status);
}
-#endif
env->sse_status.float_rounding_mode = prev_rounding_mode;
}
void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1860,19 +1867,18 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status);
-#if 0 /* TODO */
- if (mode & (1 << 3)) {
+ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact,
&env->sse_status);
}
-#endif
env->sse_status.float_rounding_mode = prev_rounding_mode;
}
void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode)
{
+ uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode;
@@ -1895,13 +1901,11 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
-#if 0 /* TODO */
- if (mode & (1 << 3)) {
+ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact,
&env->sse_status);
}
-#endif
env->sse_status.float_rounding_mode = prev_rounding_mode;
}
diff --git a/target/i386/tcg-stub.c b/target/i386/tcg-stub.c
new file mode 100644
index 0000000000..b00e23d606
--- /dev/null
+++ b/target/i386/tcg-stub.c
@@ -0,0 +1,25 @@
+/*
+ * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+
+void update_mxcsr_from_sse_status(CPUX86State *env)
+{
+}
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 5e5dbb41b0..b3fea54411 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -8157,6 +8157,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
break;
}
+ gen_helper_update_mxcsr(cpu_env);
gen_lea_modrm(env, s, modrm);
tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
gen_op_st_v(s, MO_32, s->T0, s->A0);