aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2024-11-05 21:27:18 +0000
committerPeter Maydell <peter.maydell@linaro.org>2024-11-05 21:27:18 +0000
commitf15f7273ea55472d5904c53566c82369d81214c1 (patch)
tree02d5ebe9234094f594b3a2f21345f28e2c9a7afd /target
parentc003aeff91c29ad0c17511621035bee287adead5 (diff)
parent374cdc8efe4a039510cca47e8399d54a1aeb4f2d (diff)
Merge tag 'pull-target-arm-20241105' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue: * Fix MMU indexes for AArch32 Secure PL1&0 in a less complex and buggy way * Fix SVE SDOT/UDOT/USDOT (4-way, indexed) * softfloat: set 2-operand NaN propagation rule at runtime * disas: Fix build against Capstone v6 (again) * hw/rtc/ds1338: Trace send and receive operations * hw/timer/imx_gpt: Convert DPRINTF to trace events * hw/watchdog/wdt_imx2: Remove redundant assignment * hw/sensor/tmp105: Convert printf() to trace event, add tracing for read/write access * hw/net/npcm_gmac: Change error log to trace event * target/arm: Enable FEAT_CMOW for -cpu max # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmcp/yoZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3ucMD/9pWk2ETLjdviPxlacs5IoM # HvGn8Ll2BSMbeb4YdJc7oZ4YJchGpgHhocEwZuaU9HheWjSg+ZEbyhZgN4DdkT8J # pYr+Rl0MgDNN219kYnO/yqnqlgHbtUpE7y57Li3ApoGNbWAVxsH0xoT45Lpi7DOd # uvJfIy/xdaT3zu/4uBjj7c2VrD8wntEayLM8hpqlgeQZKRG3Wtlk/xrQFKOHPDPO # MDbsGoc2FyogRQoo6WH+J6gkkR9PhqXe6Hbf6WIr1/uffZUZU4M8leSw2DgxrYHo # Zf36AzttwO4GHyML/5SR7uvzfXl7OkGyjedLGCUa7INc3br2+GvLMltdLGGPM9cc # ckMHOWd9ZQuSxcpbtPkSYRG0McRE1GLT+KV3BNOLnN9AJl3qv5Qa55iPrtpB08vX # 3jN6H964w99+NoSB2tTHszpep+M7SRuw5QLsuk3tC/qnBMpzKRwZjGVUegNUtfi/ # Lg5ExF8B62K+xb5j5FmODbbXZmb5AD0rV2MGRIVHjiHdnf7J2FmWUJCe2sYFRnRm # nzszhdOKw4PBhC2fb6Vb/DwCqdQy9vcITWpWBtcjkV5mAPhcBo/VNKNeKoc/tPNS # H8FIFIJbtv5aIixqtKcUBUmrBCYy4EoiRMLkqfC09VW60wtWswAP4KBQxi1ogehV # jJw8AgSLCl2MsVmyzgleZQ== # =Woag # -----END PGP SIGNATURE----- # gpg: Signature made Tue 05 Nov 2024 11:19:06 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * tag 'pull-target-arm-20241105' of https://git.linaro.org/people/pmaydell/qemu-arm: (31 commits) target/arm: Enable FEAT_CMOW for -cpu max hw/net/npcm_gmac: Change error log to trace event hw/sensor/tmp105: Convert printf() to trace event, add tracing for read/write access hw/watchdog/wdt_imx2: Remove redundant assignment hw/timer/imx_gpt: Convert DPRINTF to trace events hw/rtc/ds1338: Trace send and receive operations disas: Fix build against Capstone v6 (again) target/arm: Fix SVE SDOT/UDOT/USDOT (4-way, indexed) target/arm: Add new MMU indexes for AArch32 Secure PL1&0 Revert "target/arm: Fix usage of MMU indexes when EL3 is AArch32" softfloat: Remove fallback rule from pickNaN() target/rx: Explicitly set 2-NaN propagation rule target/openrisc: Explicitly set 2-NaN propagation rule target/microblaze: Explicitly set 2-NaN propagation rule target/microblaze: Move setting of float rounding mode to reset target/alpha: Explicitly set 2-NaN propagation rule target/i386: Set 2-NaN propagation rule explicitly target/xtensa: Explicitly set 2-NaN propagation rule target/xtensa: Factor out calls to set_use_first_nan() target/sparc: Explicitly set 2-NaN propagation rule ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r--target/alpha/cpu.c11
-rw-r--r--target/arm/cpu-features.h5
-rw-r--r--target/arm/cpu.c25
-rw-r--r--target/arm/cpu.h49
-rw-r--r--target/arm/helper.c73
-rw-r--r--target/arm/internals.h41
-rw-r--r--target/arm/ptw.c10
-rw-r--r--target/arm/tcg/cpu64.c1
-rw-r--r--target/arm/tcg/hflags.c4
-rw-r--r--target/arm/tcg/op_helper.c14
-rw-r--r--target/arm/tcg/translate-a64.c2
-rw-r--r--target/arm/tcg/translate.c12
-rw-r--r--target/arm/tcg/translate.h2
-rw-r--r--target/arm/tcg/vec_helper.c9
-rw-r--r--target/hppa/fpu_helper.c6
-rw-r--r--target/i386/cpu.c4
-rw-r--r--target/i386/cpu.h3
-rw-r--r--target/i386/tcg/fpu_helper.c40
-rw-r--r--target/loongarch/tcg/fpu_helper.c1
-rw-r--r--target/m68k/cpu.c16
-rw-r--r--target/m68k/fpu_helper.c1
-rw-r--r--target/m68k/helper.c4
-rw-r--r--target/microblaze/cpu.c10
-rw-r--r--target/mips/cpu.c2
-rw-r--r--target/mips/fpu_helper.h22
-rw-r--r--target/mips/msa.c17
-rw-r--r--target/openrisc/cpu.c6
-rw-r--r--target/ppc/cpu_init.c8
-rw-r--r--target/rx/cpu.c7
-rw-r--r--target/s390x/cpu.c1
-rw-r--r--target/sparc/cpu.c10
-rw-r--r--target/sparc/fop_helper.c10
-rw-r--r--target/xtensa/cpu.c2
-rw-r--r--target/xtensa/cpu.h6
-rw-r--r--target/xtensa/fpu_helper.c35
35 files changed, 345 insertions, 124 deletions
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index 9db1dffc03..5d75c941f7 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -24,6 +24,7 @@
#include "qemu/qemu-print.h"
#include "cpu.h"
#include "exec/exec-all.h"
+#include "fpu/softfloat.h"
static void alpha_cpu_set_pc(CPUState *cs, vaddr value)
@@ -187,7 +188,17 @@ static void alpha_cpu_initfn(Object *obj)
{
CPUAlphaState *env = cpu_env(CPU(obj));
+ /* TODO all this should be done in reset, not init */
+
env->lock_addr = -1;
+
+ /*
+ * TODO: this is incorrect. The Alpha Architecture Handbook version 4
+ * describes NaN propagation in section 4.7.10.4. We should prefer
+ * the operand in Fb (whether it is a QNaN or an SNaN), then the
+ * operand in Fa. That is float_2nan_prop_ba.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
#if defined(CONFIG_USER_ONLY)
env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN;
cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 04ce281826..e806f138b8 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -802,6 +802,11 @@ static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
}
+static inline bool isar_feature_aa64_cmow(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, CMOW) != 0;
+}
+
static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id)
{
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 5b751439bd..6938161b95 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -168,6 +168,18 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
}
+/*
+ * Set the float_status behaviour to match the Arm defaults:
+ * * tininess-before-rounding
+ * * 2-input NaN propagation prefers SNaN over QNaN, and then
+ * operand A over operand B (see FPProcessNaNs() pseudocode)
+ */
+static void arm_set_default_fp_behaviours(float_status *s)
+{
+ set_float_detect_tininess(float_tininess_before_rounding, s);
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
+}
+
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
{
/* Reset a single ARMCPRegInfo register */
@@ -549,14 +561,11 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
set_default_nan_mode(1, &env->vfp.standard_fp_status);
set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.fp_status);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.standard_fp_status);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.fp_status_f16);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.standard_fp_status_f16);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status);
+ arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status_f16);
+ arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
+
#ifndef CONFIG_USER_ONLY
if (kvm_enabled()) {
kvm_arm_reset_vcpu(cpu);
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8fc8b6398f..d86e641280 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1367,6 +1367,7 @@ void pmu_init(ARMCPU *cpu);
#define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */
#define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */
#define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */
+#define SCTLR_CMOW (1ULL << 32) /* FEAT_CMOW */
#define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */
#define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */
#define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */
@@ -2805,38 +2806,38 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* The only use of stage 2 translations is either as part of an s1+2
* lookup or when loading the descriptors during a stage 1 page table walk,
* and in both those cases we don't use the TLB.
- * 4. we want to be able to use the TLB for accesses done as part of a
+ * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
+ * translation regimes, because they map reasonably well to each other
+ * and they can't both be active at the same time.
+ * 5. we want to be able to use the TLB for accesses done as part of a
* stage1 page table walk, rather than having to walk the stage2 page
* table over and over.
- * 5. we need separate EL1/EL2 mmu_idx for handling the Privileged Access
+ * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access
* Never (PAN) bit within PSTATE.
- * 6. we fold together most secure and non-secure regimes for A-profile,
+ * 7. we fold together most secure and non-secure regimes for A-profile,
* because there are no banked system registers for aarch64, so the
* process of switching between secure and non-secure is
* already heavyweight.
- * 7. we cannot fold together Stage 2 Secure and Stage 2 NonSecure,
+ * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure,
* because both are in use simultaneously for Secure EL2.
*
* This gives us the following list of cases:
*
- * EL0 EL1&0 stage 1+2 (or AArch32 PL0 PL1&0 stage 1+2)
- * EL1 EL1&0 stage 1+2 (or AArch32 PL1 PL1&0 stage 1+2)
- * EL1 EL1&0 stage 1+2 +PAN (or AArch32 PL1 PL1&0 stage 1+2 +PAN)
+ * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2)
+ * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2)
+ * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN)
* EL0 EL2&0
* EL2 EL2&0
* EL2 EL2&0 +PAN
* EL2 (aka NS PL2)
- * EL3 (not used when EL3 is AArch32)
+ * EL3 (aka AArch32 S PL1 PL1&0)
+ * AArch32 S PL0 PL1&0 (we call this EL30_0)
+ * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN)
* Stage2 Secure
* Stage2 NonSecure
* plus one TLB per Physical address space: S, NS, Realm, Root
*
- * for a total of 14 different mmu_idx.
- *
- * Note that when EL3 is AArch32, the usage is potentially confusing
- * because the MMU indexes are named for their AArch64 use, so code
- * using the ARMMMUIdx_E10_1 might be at EL3, not EL1. This is because
- * Secure PL1 is always at EL3.
+ * for a total of 16 different mmu_idx.
*
* R profile CPUs have an MPU, but can use the same set of MMU indexes
* as A profile. They only need to distinguish EL0 and EL1 (and
@@ -2900,6 +2901,8 @@ typedef enum ARMMMUIdx {
ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A,
ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A,
ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E30_0 = 8 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E30_3_PAN = 9 | ARM_MMU_IDX_A,
/*
* Used for second stage of an S12 page table walk, or for descriptor
@@ -2907,14 +2910,14 @@ typedef enum ARMMMUIdx {
* are in use simultaneously for SecureEL2: the security state for
* the S2 ptw is selected by the NS bit from the S1 ptw.
*/
- ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A,
- ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Stage2_S = 10 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A,
/* TLBs with 1-1 mapping to the physical address spaces. */
- ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A,
- ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A,
- ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A,
- ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_S = 12 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_NS = 13 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_Root = 14 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_Realm = 15 | ARM_MMU_IDX_A,
/*
* These are not allocated TLBs and are used only for AT system
@@ -2953,6 +2956,8 @@ typedef enum ARMMMUIdxBit {
TO_CORE_BIT(E20_2),
TO_CORE_BIT(E20_2_PAN),
TO_CORE_BIT(E3),
+ TO_CORE_BIT(E30_0),
+ TO_CORE_BIT(E30_3_PAN),
TO_CORE_BIT(Stage2),
TO_CORE_BIT(Stage2_S),
@@ -3130,10 +3135,6 @@ FIELD(TBFLAG_A32, NS, 10, 1)
* This requires an SME trap from AArch32 mode when using NEON.
*/
FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
-/*
- * Indicates whether we are in the Secure PL1&0 translation regime
- */
-FIELD(TBFLAG_A32, S_PL1_0, 12, 1)
/*
* Bit usage when in AArch32 state, for M-profile only.
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0a731a38e8..f38eb054c0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -444,6 +444,9 @@ static int alle1_tlbmask(CPUARMState *env)
* Note that the 'ALL' scope must invalidate both stage 1 and
* stage 2 translations, whereas most other scopes only invalidate
* stage 1 translations.
+ *
+ * For AArch32 this is only used for TLBIALLNSNH and VTTBR
+ * writes, so only needs to apply to NS PL1&0, not S PL1&0.
*/
return (ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
@@ -3701,7 +3704,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
*/
format64 = arm_s1_regime_using_lpae_format(env, mmu_idx);
- if (arm_feature(env, ARM_FEATURE_EL2) && !arm_aa32_secure_pl1_0(env)) {
+ if (arm_feature(env, ARM_FEATURE_EL2)) {
if (mmu_idx == ARMMMUIdx_E10_0 ||
mmu_idx == ARMMMUIdx_E10_1 ||
mmu_idx == ARMMMUIdx_E10_1_PAN) {
@@ -3775,11 +3778,17 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
case 0:
/* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */
switch (el) {
+ case 3:
+ if (ri->crm == 9 && arm_pan_enabled(env)) {
+ mmu_idx = ARMMMUIdx_E30_3_PAN;
+ } else {
+ mmu_idx = ARMMMUIdx_E3;
+ }
+ break;
case 2:
g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */
/* fall through */
case 1:
- case 3:
if (ri->crm == 9 && arm_pan_enabled(env)) {
mmu_idx = ARMMMUIdx_Stage1_E1_PAN;
} else {
@@ -3794,7 +3803,7 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
/* stage 1 current state PL0: ATS1CUR, ATS1CUW */
switch (el) {
case 3:
- mmu_idx = ARMMMUIdx_E10_0;
+ mmu_idx = ARMMMUIdx_E30_0;
break;
case 2:
g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */
@@ -4904,11 +4913,14 @@ static int vae1_tlbmask(CPUARMState *env)
uint64_t hcr = arm_hcr_el2_eff(env);
uint16_t mask;
+ assert(arm_feature(env, ARM_FEATURE_AARCH64));
+
if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
mask = ARMMMUIdxBit_E20_2 |
ARMMMUIdxBit_E20_2_PAN |
ARMMMUIdxBit_E20_0;
} else {
+ /* This is AArch64 only, so we don't need to touch the EL30_x TLBs */
mask = ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
ARMMMUIdxBit_E10_0;
@@ -4947,6 +4959,8 @@ static int vae1_tlbbits(CPUARMState *env, uint64_t addr)
uint64_t hcr = arm_hcr_el2_eff(env);
ARMMMUIdx mmu_idx;
+ assert(arm_feature(env, ARM_FEATURE_AARCH64));
+
/* Only the regime of the mmu_idx below is significant. */
if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
mmu_idx = ARMMMUIdx_E20_0;
@@ -6215,6 +6229,11 @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri,
if (cpu_isar_feature(aa64_nmi, cpu)) {
valid_mask |= HCRX_TALLINT | HCRX_VINMI | HCRX_VFNMI;
}
+ /* FEAT_CMOW adds CMOW */
+
+ if (cpu_isar_feature(aa64_cmow, cpu)) {
+ valid_mask |= HCRX_CMOW;
+ }
/* Clear RES0 bits. */
env->cp15.hcrx_el2 = value & valid_mask;
@@ -11860,13 +11879,20 @@ void arm_cpu_do_interrupt(CPUState *cs)
uint64_t arm_sctlr(CPUARMState *env, int el)
{
- if (arm_aa32_secure_pl1_0(env)) {
- /* In Secure PL1&0 SCTLR_S is always controlling */
- el = 3;
- } else if (el == 0) {
- /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */
+ /* Only EL0 needs to be adjusted for EL1&0 or EL2&0 or EL3&0 */
+ if (el == 0) {
ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0);
- el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1;
+ switch (mmu_idx) {
+ case ARMMMUIdx_E20_0:
+ el = 2;
+ break;
+ case ARMMMUIdx_E30_0:
+ el = 3;
+ break;
+ default:
+ el = 1;
+ break;
+ }
}
return env->cp15.sctlr_el[el];
}
@@ -12524,12 +12550,8 @@ int fp_exception_el(CPUARMState *env, int cur_el)
return 0;
}
-/*
- * Return the exception level we're running at if this is our mmu_idx.
- * s_pl1_0 should be true if this is the AArch32 Secure PL1&0 translation
- * regime.
- */
-int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0)
+/* Return the exception level we're running at if this is our mmu_idx */
+int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
{
if (mmu_idx & ARM_MMU_IDX_M) {
return mmu_idx & ARM_MMU_IDX_M_PRIV;
@@ -12538,15 +12560,17 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0)
switch (mmu_idx) {
case ARMMMUIdx_E10_0:
case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E30_0:
return 0;
case ARMMMUIdx_E10_1:
case ARMMMUIdx_E10_1_PAN:
- return s_pl1_0 ? 3 : 1;
+ return 1;
case ARMMMUIdx_E2:
case ARMMMUIdx_E20_2:
case ARMMMUIdx_E20_2_PAN:
return 2;
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_3_PAN:
return 3;
default:
g_assert_not_reached();
@@ -12575,19 +12599,13 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
hcr = arm_hcr_el2_eff(env);
if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
idx = ARMMMUIdx_E20_0;
+ } else if (arm_is_secure_below_el3(env) &&
+ !arm_el_is_aa64(env, 3)) {
+ idx = ARMMMUIdx_E30_0;
} else {
idx = ARMMMUIdx_E10_0;
}
break;
- case 3:
- /*
- * AArch64 EL3 has its own translation regime; AArch32 EL3
- * uses the Secure PL1&0 translation regime.
- */
- if (arm_el_is_aa64(env, 3)) {
- return ARMMMUIdx_E3;
- }
- /* fall through */
case 1:
if (arm_pan_enabled(env)) {
idx = ARMMMUIdx_E10_1_PAN;
@@ -12607,6 +12625,11 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
idx = ARMMMUIdx_E2;
}
break;
+ case 3:
+ if (!arm_el_is_aa64(env, 3) && arm_pan_enabled(env)) {
+ return ARMMMUIdx_E30_3_PAN;
+ }
+ return ARMMMUIdx_E3;
default:
g_assert_not_reached();
}
diff --git a/target/arm/internals.h b/target/arm/internals.h
index fd8f7c82aa..e37f459af3 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -276,20 +276,6 @@ FIELD(CNTHCTL, CNTPMASK, 19, 1)
#define M_FAKE_FSR_SFAULT 0xe /* SecureFault INVTRAN, INVEP or AUVIOL */
/**
- * arm_aa32_secure_pl1_0(): Return true if in Secure PL1&0 regime
- *
- * Return true if the CPU is in the Secure PL1&0 translation regime.
- * This requires that EL3 exists and is AArch32 and we are currently
- * Secure. If this is the case then the ARMMMUIdx_E10* apply and
- * mean we are in EL3, not EL1.
- */
-static inline bool arm_aa32_secure_pl1_0(CPUARMState *env)
-{
- return arm_feature(env, ARM_FEATURE_EL3) &&
- !arm_el_is_aa64(env, 3) && arm_is_secure(env);
-}
-
-/**
* raise_exception: Raise the specified exception.
* Raise a guest exception with the specified value, syndrome register
* and target exception level. This should be called from helper functions,
@@ -841,12 +827,7 @@ static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx)
return mmu_idx | ARM_MMU_IDX_A;
}
-/**
- * Return the exception level we're running at if our current MMU index
- * is @mmu_idx. @s_pl1_0 should be true if this is the AArch32
- * Secure PL1&0 translation regime.
- */
-int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0);
+int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx);
/* Return the MMU index for a v7M CPU in the specified security state */
ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate);
@@ -890,7 +871,16 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu)
}
}
-/* Return true if this address translation regime has two ranges. */
+/*
+ * Return true if this address translation regime has two ranges.
+ * Note that this will not return the correct answer for AArch32
+ * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is
+ * never called from a context where EL3 can be AArch32. (The
+ * correct return value for ARMMMUIdx_E3 would be different for
+ * that case, so we can't just make the function return the
+ * correct value anyway; we would need an extra "bool e3_is_aarch32"
+ * argument which all the current callsites would pass as 'false'.)
+ */
static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx)
{
switch (mmu_idx) {
@@ -915,6 +905,7 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx)
case ARMMMUIdx_Stage1_E1_PAN:
case ARMMMUIdx_E10_1_PAN:
case ARMMMUIdx_E20_2_PAN:
+ case ARMMMUIdx_E30_3_PAN:
return true;
default:
return false;
@@ -938,14 +929,15 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
case ARMMMUIdx_E2:
return 2;
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
return 3;
case ARMMMUIdx_E10_0:
case ARMMMUIdx_Stage1_E0:
- case ARMMMUIdx_E10_1:
- case ARMMMUIdx_E10_1_PAN:
case ARMMMUIdx_Stage1_E1:
case ARMMMUIdx_Stage1_E1_PAN:
- return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3;
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
case ARMMMUIdx_MPrivNegPri:
case ARMMMUIdx_MUserNegPri:
case ARMMMUIdx_MPriv:
@@ -965,6 +957,7 @@ static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
switch (mmu_idx) {
case ARMMMUIdx_E10_0:
case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E30_0:
case ARMMMUIdx_Stage1_E0:
case ARMMMUIdx_MUser:
case ARMMMUIdx_MSUser:
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index dd40268397..9849949508 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -280,6 +280,8 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx,
case ARMMMUIdx_E20_2_PAN:
case ARMMMUIdx_E2:
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
break;
case ARMMMUIdx_Phys_S:
@@ -3607,11 +3609,7 @@ bool get_phys_addr(CPUARMState *env, vaddr address,
case ARMMMUIdx_Stage1_E1:
case ARMMMUIdx_Stage1_E1_PAN:
case ARMMMUIdx_E2:
- if (arm_aa32_secure_pl1_0(env)) {
- ss = ARMSS_Secure;
- } else {
- ss = arm_security_space_below_el3(env);
- }
+ ss = arm_security_space_below_el3(env);
break;
case ARMMMUIdx_Stage2:
/*
@@ -3639,6 +3637,8 @@ bool get_phys_addr(CPUARMState *env, vaddr address,
ss = ARMSS_Secure;
break;
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
if (arm_feature(env, ARM_FEATURE_AARCH64) &&
cpu_isar_feature(aa64_rme, env_archcpu(env))) {
ss = ARMSS_Root;
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 0168920828..2963d7510f 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1218,6 +1218,7 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */
t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */
t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */
+ t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */
cpu->isar.id_aa64mmfr1 = t;
t = cpu->isar.id_aa64mmfr2;
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index bab7822ef6..f03977b4b0 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -198,10 +198,6 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
}
- if (arm_aa32_secure_pl1_0(env)) {
- DP_TBFLAG_A32(flags, S_PL1_0, 1);
- }
-
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
}
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
index c083e5cfb8..1ecb465988 100644
--- a/target/arm/tcg/op_helper.c
+++ b/target/arm/tcg/op_helper.c
@@ -912,7 +912,19 @@ void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome)
{
/* See arm_sctlr(), but we also need the sctlr el. */
ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0);
- int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1;
+ int target_el;
+
+ switch (mmu_idx) {
+ case ARMMMUIdx_E20_0:
+ target_el = 2;
+ break;
+ case ARMMMUIdx_E30_0:
+ target_el = 3;
+ break;
+ default:
+ target_el = 1;
+ break;
+ }
/*
* The bit is not valid unless the target el is aa64, but since the
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index ec0b1ee252..b2851ea503 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -11690,7 +11690,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
- dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx, false);
+ dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
#if !defined(CONFIG_USER_ONLY)
dc->user = (dc->current_el == 0);
#endif
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index e2748ff2bb..9ee761fc64 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -228,6 +228,9 @@ static inline int get_a32_user_mem_index(DisasContext *s)
*/
switch (s->mmu_idx) {
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
+ return arm_to_core_mmu_idx(ARMMMUIdx_E30_0);
case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
case ARMMMUIdx_E10_0:
case ARMMMUIdx_E10_1:
@@ -7546,6 +7549,10 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
+ dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
+#if !defined(CONFIG_USER_ONLY)
+ dc->user = (dc->current_el == 0);
+#endif
dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
@@ -7576,12 +7583,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
}
dc->sme_trap_nonstreaming =
EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
- dc->s_pl1_0 = EX_TBFLAG_A32(tb_flags, S_PL1_0);
}
- dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx, dc->s_pl1_0);
-#if !defined(CONFIG_USER_ONLY)
- dc->user = (dc->current_el == 0);
-#endif
dc->lse2 = false; /* applies only to aarch64 */
dc->cp_regs = cpu->cp_regs;
dc->features = env->features;
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 5a2e10d64d..20cd0e851c 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -165,8 +165,6 @@ typedef struct DisasContext {
uint8_t gm_blocksize;
/* True if the current insn_start has been updated. */
bool insn_start_updated;
- /* True if this is the AArch32 Secure PL1&0 translation regime */
- bool s_pl1_0;
/* Bottom two bits of XScale c15_cpar coprocessor access control reg */
int c15_cpar;
/* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 22ddb96881..e825d501a2 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -836,6 +836,13 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
{ \
intptr_t i = 0, opr_sz = simd_oprsz(desc); \
intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \
+ /* \
+ * Special case: opr_sz == 8 from AA64/AA32 advsimd means the \
+ * first iteration might not be a full 16 byte segment. But \
+ * for vector lengths beyond that this must be SVE and we know \
+ * opr_sz is a multiple of 16, so we need not clamp segend \
+ * to opr_sz_n when we advance it at the end of the loop. \
+ */ \
intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \
intptr_t index = simd_data(desc); \
TYPED *d = vd, *a = va; \
@@ -853,7 +860,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
n[i * 4 + 2] * m2 + \
n[i * 4 + 3] * m3); \
} while (++i < segend); \
- segend = i + 4; \
+ segend = i + (16 / sizeof(TYPED)); \
} while (i < opr_sz_n); \
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c
index deaed2b65d..0e44074ba8 100644
--- a/target/hppa/fpu_helper.c
+++ b/target/hppa/fpu_helper.c
@@ -49,6 +49,12 @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
d = FIELD_EX32(shadow, FPSR, D);
set_flush_to_zero(d, &env->fp_status);
set_flush_inputs_to_zero(d, &env->fp_status);
+
+ /*
+ * TODO: we only need to do this at CPU reset, but currently
+ * HPPA does note implement a CPU reset method at all...
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status);
}
void cpu_hppa_loaded_fr0(CPUHPPAState *env)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 3baa95481f..3d2874cf78 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7200,6 +7200,10 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type)
memset(env, 0, offsetof(CPUX86State, end_reset_fields));
+ if (tcg_enabled()) {
+ cpu_init_fp_statuses(env);
+ }
+
env->old_exception = -1;
/* init to reset state */
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 59959b8b7a..c24d81bf31 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2614,6 +2614,9 @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env)
int get_pg_mode(CPUX86State *env);
/* fpu_helper.c */
+
+/* Set all non-runtime-variable float_status fields to x86 handling */
+void cpu_init_fp_statuses(CPUX86State *env);
void update_fp_status(CPUX86State *env);
void update_mxcsr_status(CPUX86State *env);
void update_mxcsr_from_sse_status(CPUX86State *env);
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index e1b850f3fc..53b49bb297 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -135,6 +135,46 @@ static void fpu_set_exception(CPUX86State *env, int mask)
}
}
+void cpu_init_fp_statuses(CPUX86State *env)
+{
+ /*
+ * Initialise the non-runtime-varying fields of the various
+ * float_status words to x86 behaviour. This must be called at
+ * CPU reset because the float_status words are in the
+ * "zeroed on reset" portion of the CPU state struct.
+ * Fields in float_status that vary under guest control are set
+ * via the codepath for setting that register, eg cpu_set_fpuc().
+ */
+ /*
+ * Use x87 NaN propagation rules:
+ * SNaN + QNaN => return the QNaN
+ * two SNaNs => return the one with the larger significand, silenced
+ * two QNaNs => return the one with the larger significand
+ * SNaN and a non-NaN => return the SNaN, silenced
+ * QNaN and a non-NaN => return the QNaN
+ *
+ * If we get down to comparing significands and they are the same,
+ * return the NaN with the positive sign bit (if any).
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
+ /*
+ * TODO: These are incorrect: the x86 Software Developer's Manual vol 1
+ * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the
+ * "larger significand" behaviour is only used for x87 FPU operations.
+ * For SSE the required behaviour is to always return the first NaN,
+ * which is float_2nan_prop_ab.
+ *
+ * mmx_status is used only for the AMD 3DNow! instructions, which
+ * are documented in the "3DNow! Technology Manual" as not supporting
+ * NaNs or infinities as inputs. The result of passing two NaNs is
+ * documented as "undefined", so we can do what we choose.
+ * (Strictly there is some behaviour we don't implement correctly
+ * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".)
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status);
+ set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status);
+}
+
static inline uint8_t save_exception_flags(CPUX86State *env)
{
uint8_t old_flags = get_float_exception_flags(&env->fp_status);
diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c
index f6753c5875..21bc3b04a9 100644
--- a/target/loongarch/tcg/fpu_helper.c
+++ b/target/loongarch/tcg/fpu_helper.c
@@ -31,6 +31,7 @@ void restore_fp_status(CPULoongArchState *env)
set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
&env->fp_status);
set_flush_to_zero(0, &env->fp_status);
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status);
}
int ieee_ex_to_loongarch(int xcpt)
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 1d49f4cb23..5fe335558a 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -93,6 +93,22 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type)
env->fregs[i].d = nan;
}
cpu_m68k_set_fpcr(env, 0);
+ /*
+ * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL
+ * 3.4 FLOATING-POINT INSTRUCTION DETAILS
+ * If either operand, but not both operands, of an operation is a
+ * nonsignaling NaN, then that NaN is returned as the result. If both
+ * operands are nonsignaling NaNs, then the destination operand
+ * nonsignaling NaN is returned as the result.
+ * If either operand to an operation is a signaling NaN (SNaN), then the
+ * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit
+ * is set in the FPCR ENABLE byte, then the exception is taken and the
+ * destination is not modified. If the SNaN exception enable bit is not
+ * set, setting the SNaN bit in the operand to a one converts the SNaN to
+ * a nonsignaling NaN. The operation then continues as described in the
+ * preceding paragraph for nonsignaling NaNs.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
env->fpsr = 0;
/* TODO: We should set PC from the interrupt vector. */
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
index 8314791f50..a605162b71 100644
--- a/target/m68k/fpu_helper.c
+++ b/target/m68k/fpu_helper.c
@@ -620,6 +620,7 @@ void HELPER(frem)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
int sign;
/* Calculate quotient directly using round to nearest mode */
+ set_float_2nan_prop_rule(float_2nan_prop_ab, &fp_status);
set_float_rounding_mode(float_round_nearest_even, &fp_status);
set_floatx80_rounding_precision(
get_floatx80_rounding_precision(&env->fp_status), &fp_status);
diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index 9d3db8419d..9bfc6ae97c 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -36,7 +36,7 @@ static int cf_fpu_gdb_get_reg(CPUState *cs, GByteArray *mem_buf, int n)
CPUM68KState *env = &cpu->env;
if (n < 8) {
- float_status s;
+ float_status s = {};
return gdb_get_reg64(mem_buf, floatx80_to_float64(env->fregs[n].d, &s));
}
switch (n) {
@@ -56,7 +56,7 @@ static int cf_fpu_gdb_set_reg(CPUState *cs, uint8_t *mem_buf, int n)
CPUM68KState *env = &cpu->env;
if (n < 8) {
- float_status s;
+ float_status s = {};
env->fregs[n].d = float64_to_floatx80(ldq_be_p(mem_buf), &s);
return 8;
}
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 135947ee80..14286deead 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -201,6 +201,13 @@ static void mb_cpu_reset_hold(Object *obj, ResetType type)
env->pc = cpu->cfg.base_vectors;
+ set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
+ /*
+ * TODO: this is probably not the correct NaN propagation rule for
+ * this architecture.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
+
#if defined(CONFIG_USER_ONLY)
/* start in user mode with interrupts enabled. */
mb_cpu_write_msr(env, MSR_EE | MSR_IE | MSR_VM | MSR_UM);
@@ -311,15 +318,12 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
static void mb_cpu_initfn(Object *obj)
{
MicroBlazeCPU *cpu = MICROBLAZE_CPU(obj);
- CPUMBState *env = &cpu->env;
gdb_register_coprocessor(CPU(cpu), mb_cpu_gdb_read_stack_protect,
mb_cpu_gdb_write_stack_protect,
gdb_find_static_feature("microblaze-stack-protect.xml"),
0);
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
-
#ifndef CONFIG_USER_ONLY
/* Inbound IRQ and FIR lines */
qdev_init_gpio_in(DEVICE(cpu), microblaze_cpu_set_irq, 2);
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 9724e71a5e..d0a43b6d5c 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -407,9 +407,9 @@ static void mips_cpu_reset_hold(Object *obj, ResetType type)
}
msa_reset(env);
+ fp_reset(env);
compute_hflags(env);
- restore_fp_status(env);
restore_pamask(env);
cs->exception_index = EXCP_NONE;
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
index ad1116e8c1..7c3c7897b4 100644
--- a/target/mips/fpu_helper.h
+++ b/target/mips/fpu_helper.h
@@ -44,6 +44,28 @@ static inline void restore_fp_status(CPUMIPSState *env)
restore_snan_bit_mode(env);
}
+static inline void fp_reset(CPUMIPSState *env)
+{
+ restore_fp_status(env);
+
+ /*
+ * According to MIPS specifications, if one of the two operands is
+ * a sNaN, a new qNaN has to be generated. This is done in
+ * floatXX_silence_nan(). For qNaN inputs the specifications
+ * says: "When possible, this QNaN result is one of the operand QNaN
+ * values." In practice it seems that most implementations choose
+ * the first operand if both operands are qNaN. In short this gives
+ * the following rules:
+ * 1. A if it is signaling
+ * 2. B if it is signaling
+ * 3. A (quiet)
+ * 4. B (quiet)
+ * A signaling NaN is always silenced before returning it.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab,
+ &env->active_fpu.fp_status);
+}
+
/* MSA */
enum CPUMIPSMSADataFormat {
diff --git a/target/mips/msa.c b/target/mips/msa.c
index 61f1a9a593..9dffc428f5 100644
--- a/target/mips/msa.c
+++ b/target/mips/msa.c
@@ -49,6 +49,23 @@ void msa_reset(CPUMIPSState *env)
set_float_detect_tininess(float_tininess_after_rounding,
&env->active_tc.msa_fp_status);
+ /*
+ * According to MIPS specifications, if one of the two operands is
+ * a sNaN, a new qNaN has to be generated. This is done in
+ * floatXX_silence_nan(). For qNaN inputs the specifications
+ * says: "When possible, this QNaN result is one of the operand QNaN
+ * values." In practice it seems that most implementations choose
+ * the first operand if both operands are qNaN. In short this gives
+ * the following rules:
+ * 1. A if it is signaling
+ * 2. B if it is signaling
+ * 3. A (quiet)
+ * 4. B (quiet)
+ * A signaling NaN is always silenced before returning it.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab,
+ &env->active_tc.msa_fp_status);
+
/* clear float_status exception flags */
set_float_exception_flags(0, &env->active_tc.msa_fp_status);
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 6ec54ad7a6..b96561d1f2 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -105,6 +105,12 @@ static void openrisc_cpu_reset_hold(Object *obj, ResetType type)
set_float_detect_tininess(float_tininess_before_rounding,
&cpu->env.fp_status);
+ /*
+ * TODO: this is probably not the correct NaN propagation rule for
+ * this architecture.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_x87, &cpu->env.fp_status);
+
#ifndef CONFIG_USER_ONLY
cpu->env.picmr = 0x00000000;
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 6f352550db..efcb80d1c2 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7262,6 +7262,14 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
/* tininess for underflow is detected before rounding */
set_float_detect_tininess(float_tininess_before_rounding,
&env->fp_status);
+ /*
+ * PowerPC propagation rules:
+ * 1. A if it sNaN or qNaN
+ * 2. B if it sNaN or qNaN
+ * A signaling NaN is always silenced before returning it.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status);
+ set_float_2nan_prop_rule(float_2nan_prop_ab, &env->vec_status);
for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) {
ppc_spr_t *spr = &env->spr_cb[i];
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 36d2a6f189..65a74ce720 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -93,6 +93,13 @@ static void rx_cpu_reset_hold(Object *obj, ResetType type)
env->fpsw = 0;
set_flush_to_zero(1, &env->fp_status);
set_flush_inputs_to_zero(1, &env->fp_status);
+ /*
+ * TODO: this is not the correct NaN propagation rule for this
+ * architecture. The "RX Family User's Manual: Software" table 1.6
+ * defines the propagation rules as "prefer SNaN over QNaN;
+ * then prefer dest over source", which is float_2nan_prop_s_ab.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
}
static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 4e41a3dff5..514c70f301 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -205,6 +205,7 @@ static void s390_cpu_reset_hold(Object *obj, ResetType type)
/* tininess for underflow is detected before rounding */
set_float_detect_tininess(float_tininess_before_rounding,
&env->fpu_status);
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fpu_status);
/* fall through */
case RESET_TYPE_S390_CPU_NORMAL:
env->psw.mask &= ~PSW_MASK_RI;
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 54cb269e0a..dd7af86de7 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -26,6 +26,7 @@
#include "hw/qdev-properties.h"
#include "qapi/visitor.h"
#include "tcg/tcg.h"
+#include "fpu/softfloat.h"
//#define DEBUG_FEATURES
@@ -76,6 +77,7 @@ static void sparc_cpu_reset_hold(Object *obj, ResetType type)
env->npc = env->pc + 4;
#endif
env->cache_control = 0;
+ cpu_put_fsr(env, 0);
}
#ifndef CONFIG_USER_ONLY
@@ -805,7 +807,13 @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp)
env->version |= env->def.maxtl << 8;
env->version |= env->def.nwindows - 1;
#endif
- cpu_put_fsr(env, 0);
+
+ /*
+ * Prefer SNaN over QNaN, order B then A. It's OK to do this in realize
+ * rather than reset, because fp_status is after 'end_reset_fields' in
+ * the CPU state struct so it won't get zeroed on reset.
+ */
+ set_float_2nan_prop_rule(float_2nan_prop_s_ba, &env->fp_status);
cpu_exec_realizefn(cs, &local_err);
if (local_err != NULL) {
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index b6692382b3..6f9ccc008a 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -497,7 +497,10 @@ uint32_t helper_flcmps(float32 src1, float32 src2)
* Perform the comparison with a dummy fp environment.
*/
float_status discard = { };
- FloatRelation r = float32_compare_quiet(src1, src2, &discard);
+ FloatRelation r;
+
+ set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard);
+ r = float32_compare_quiet(src1, src2, &discard);
switch (r) {
case float_relation_equal:
@@ -518,7 +521,10 @@ uint32_t helper_flcmps(float32 src1, float32 src2)
uint32_t helper_flcmpd(float64 src1, float64 src2)
{
float_status discard = { };
- FloatRelation r = float64_compare_quiet(src1, src2, &discard);
+ FloatRelation r;
+
+ set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard);
+ r = float64_compare_quiet(src1, src2, &discard);
switch (r) {
case float_relation_equal:
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index a08c7a0b1f..6f9039abae 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -134,7 +134,7 @@ static void xtensa_cpu_reset_hold(Object *obj, ResetType type)
cs->halted = env->runstall;
#endif
set_no_signaling_nans(!dfpu, &env->fp_status);
- set_use_first_nan(!dfpu, &env->fp_status);
+ xtensa_use_first_nan(env, !dfpu);
}
static ObjectClass *xtensa_cpu_class_by_name(const char *cpu_model)
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 9f2341d856..77e48eef19 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -802,4 +802,10 @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, vaddr *pc,
XtensaCPU *xtensa_cpu_create_with_clock(const char *cpu_type,
Clock *cpu_refclk);
+/*
+ * Set the NaN propagation rule for future FPU operations:
+ * use_first is true to pick the first NaN as the result if both
+ * inputs are NaNs, false to pick the second.
+ */
+void xtensa_use_first_nan(CPUXtensaState *env, bool use_first);
#endif
diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c
index 381e83ded8..f2d212d05d 100644
--- a/target/xtensa/fpu_helper.c
+++ b/target/xtensa/fpu_helper.c
@@ -57,6 +57,13 @@ static const struct {
{ XTENSA_FP_V, float_flag_invalid, },
};
+void xtensa_use_first_nan(CPUXtensaState *env, bool use_first)
+{
+ set_use_first_nan(use_first, &env->fp_status);
+ set_float_2nan_prop_rule(use_first ? float_2nan_prop_ab : float_2nan_prop_ba,
+ &env->fp_status);
+}
+
void HELPER(wur_fpu2k_fcr)(CPUXtensaState *env, uint32_t v)
{
static const int rounding_mode[] = {
@@ -171,87 +178,87 @@ float32 HELPER(fpu2k_msub_s)(CPUXtensaState *env,
float64 HELPER(add_d)(CPUXtensaState *env, float64 a, float64 b)
{
- set_use_first_nan(true, &env->fp_status);
+ xtensa_use_first_nan(env, true);
return float64_add(a, b, &env->fp_status);
}
float32 HELPER(add_s)(CPUXtensaState *env, float32 a, float32 b)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float32_add(a, b, &env->fp_status);
}
float64 HELPER(sub_d)(CPUXtensaState *env, float64 a, float64 b)
{
- set_use_first_nan(true, &env->fp_status);
+ xtensa_use_first_nan(env, true);
return float64_sub(a, b, &env->fp_status);
}
float32 HELPER(sub_s)(CPUXtensaState *env, float32 a, float32 b)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float32_sub(a, b, &env->fp_status);
}
float64 HELPER(mul_d)(CPUXtensaState *env, float64 a, float64 b)
{
- set_use_first_nan(true, &env->fp_status);
+ xtensa_use_first_nan(env, true);
return float64_mul(a, b, &env->fp_status);
}
float32 HELPER(mul_s)(CPUXtensaState *env, float32 a, float32 b)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float32_mul(a, b, &env->fp_status);
}
float64 HELPER(madd_d)(CPUXtensaState *env, float64 a, float64 b, float64 c)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float64_muladd(b, c, a, 0, &env->fp_status);
}
float32 HELPER(madd_s)(CPUXtensaState *env, float32 a, float32 b, float32 c)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float32_muladd(b, c, a, 0, &env->fp_status);
}
float64 HELPER(msub_d)(CPUXtensaState *env, float64 a, float64 b, float64 c)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float64_muladd(b, c, a, float_muladd_negate_product,
&env->fp_status);
}
float32 HELPER(msub_s)(CPUXtensaState *env, float32 a, float32 b, float32 c)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float32_muladd(b, c, a, float_muladd_negate_product,
&env->fp_status);
}
float64 HELPER(mkdadj_d)(CPUXtensaState *env, float64 a, float64 b)
{
- set_use_first_nan(true, &env->fp_status);
+ xtensa_use_first_nan(env, true);
return float64_div(b, a, &env->fp_status);
}
float32 HELPER(mkdadj_s)(CPUXtensaState *env, float32 a, float32 b)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float32_div(b, a, &env->fp_status);
}
float64 HELPER(mksadj_d)(CPUXtensaState *env, float64 v)
{
- set_use_first_nan(true, &env->fp_status);
+ xtensa_use_first_nan(env, true);
return float64_sqrt(v, &env->fp_status);
}
float32 HELPER(mksadj_s)(CPUXtensaState *env, float32 v)
{
- set_use_first_nan(env->config->use_first_nan, &env->fp_status);
+ xtensa_use_first_nan(env, env->config->use_first_nan);
return float32_sqrt(v, &env->fp_status);
}