diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-06-06 12:11:34 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-06-06 12:11:34 -0700 |
commit | f5e6786de4815751b0a3d2235c760361f228ea48 (patch) | |
tree | 544d0ff2d361be94e83bc620ea0e57b2886487ac /target | |
parent | c0dde5fc5ccce56b69095bc29af72987efd65d1e (diff) | |
parent | f9ac778898cb28307e0f91421aba34d43c34b679 (diff) |
Merge tag 'pull-target-arm-20230606' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue:
* Support gdbstub (guest debug) in HVF
* xnlx-versal: Support CANFD controller
* bpim2u: New board model: Banana Pi BPI-M2 Ultra
* Emulate FEAT_LSE2
* allow DC CVA[D]P in user mode emulation
* trap DCC access in user mode emulation
# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmR/AKUZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3jzIEACNepQGY44yPhrEG+wD4WAB
# fH670KI33HcsFd2rGsC369gcssQbRIW/29reOzNhRMuol+kHI6OFaONpuKSdO0Rz
# TLVIsnT2Uq8KwbYfLtDQt5knj027amPy75d4re8wIK1eZB4dOIHysqAvQrJYeync
# 9obKku8xXGLwZh/mYHoVgHcZU0cPJO9nri39n1tV3JUBsgmqEURjzbZrMcF+yMX7
# bUzOYQvC1Iedmo+aWfx43u82AlNQFz1lsqmnQj7Z5rvv0HT+BRF5WzVMP0qRh5+Z
# njkqmBH9xb9kkgeHmeMvHpWox+J+obeSmVg/4gDNlJpThmpuU0Vr7EXUN3MBQlV9
# lhyy6zrTwC/BToiQqdT2dnpao9FzXy5exfnqi/py5IuqfjAzSO+p61LlPPZ4cJri
# pCK4yq2gzQXYfrlZkUJipvRMH8Xa4IdQx+w7lXrQoJdduF4/+6aJW/GAWSu0e7eC
# zgBwaJjI7ENce8ixJnuEFUxUnaBo8dl72a0PGA1UU8PL+cJNOIpyhPk4goWQprdn
# iFF4ZnjhBRZ2gk/4HGD9u5Vo2lNqP93YS5QhkGkF+HJsBmcOZgidIUpfHhPQvvHO
# Np196T2cAETCWGV1xG4CaTpxN2ndRReq3C0/mzfhIbwhXEACtvAiSlO4KB8t6pJj
# MzinCABXHcovJbGbxZ9j6w==
# =8SdN
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 06 Jun 2023 02:47:17 AM PDT
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
* tag 'pull-target-arm-20230606' of https://git.linaro.org/people/pmaydell/qemu-arm: (42 commits)
target/arm: trap DCC access in user mode emulation
tests/tcg/aarch64: add DC CVA[D]P tests
target/arm: allow DC CVA[D]P in user mode emulation
target/arm: Enable FEAT_LSE2 for -cpu max
tests/tcg/multiarch: Adjust sigbus.c
tests/tcg/aarch64: Use stz2g in mte-7.c
target/arm: Move mte check for store-exclusive
target/arm: Relax ordered/atomic alignment checks for LSE2
target/arm: Add SCTLR.nAA to TBFLAG_A64
target/arm: Check alignment in helper_mte_check
target/arm: Pass single_memop to gen_mte_checkN
target/arm: Pass memop to gen_mte_check1*
target/arm: Hoist finalize_memop out of do_fp_{ld, st}
target/arm: Hoist finalize_memop out of do_gpr_{ld, st}
target/arm: Load/store integer pair with one tcg operation
target/arm: Sink gen_mte_check1 into load/store_exclusive
target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r
target/arm: Use tcg_gen_qemu_st_i128 for STZG, STZ2G
target/arm: Use tcg_gen_qemu_{st, ld}_i128 for do_fp_{st, ld}
target/arm: Use tcg_gen_qemu_ld_i128 for LDXP
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/cpu.c | 2 | ||||
-rw-r--r-- | target/arm/cpu.h | 16 | ||||
-rw-r--r-- | target/arm/debug_helper.c | 5 | ||||
-rw-r--r-- | target/arm/helper.c | 6 | ||||
-rw-r--r-- | target/arm/hvf/hvf.c | 750 | ||||
-rw-r--r-- | target/arm/hvf_arm.h | 7 | ||||
-rw-r--r-- | target/arm/hyp_gdbstub.c | 253 | ||||
-rw-r--r-- | target/arm/internals.h | 53 | ||||
-rw-r--r-- | target/arm/kvm64.c | 276 | ||||
-rw-r--r-- | target/arm/meson.build | 3 | ||||
-rw-r--r-- | target/arm/tcg/cpu64.c | 1 | ||||
-rw-r--r-- | target/arm/tcg/helper-a64.c | 7 | ||||
-rw-r--r-- | target/arm/tcg/helper-a64.h | 3 | ||||
-rw-r--r-- | target/arm/tcg/hflags.c | 6 | ||||
-rw-r--r-- | target/arm/tcg/mte_helper.c | 18 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.c | 477 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.h | 4 | ||||
-rw-r--r-- | target/arm/tcg/translate-sve.c | 106 | ||||
-rw-r--r-- | target/arm/tcg/translate.c | 1 | ||||
-rw-r--r-- | target/arm/tcg/translate.h | 65 | ||||
-rw-r--r-- | target/i386/hvf/hvf.c | 33 |
21 files changed, 1614 insertions, 478 deletions
diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 5182ed0c91..4d5bb57f07 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -289,6 +289,8 @@ static void arm_cpu_reset_hold(Object *obj) * This is not yet exposed from the Linux kernel in any way. */ env->cp15.sctlr_el[1] |= SCTLR_TSCXT; + /* Disable access to Debug Communication Channel (DCC). */ + env->cp15.mdscr_el1 |= 1 << 12; #else /* Reset into the highest available EL */ if (arm_feature(env, ARM_FEATURE_EL3)) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index d469a2637b..36c608f0e6 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -677,8 +677,16 @@ typedef struct CPUArchState { uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ } vfp; + uint64_t exclusive_addr; uint64_t exclusive_val; + /* + * Contains the 'val' for the second 64-bit register of LDXP, which comes + * from the higher address, not the high part of a complete 128-bit value. + * In some ways it might be more convenient to record the exclusive value + * as the low and high halves of a 128 bit data value, but the current + * semantics of these fields are baked into the migration format. + */ uint64_t exclusive_high; /* iwMMXt coprocessor state. */ @@ -1240,7 +1248,7 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_D (1U << 5) /* up to v5; RAO in v6 */ #define SCTLR_CP15BEN (1U << 5) /* v7 onward */ #define SCTLR_L (1U << 6) /* up to v5; RAO in v6 and v7; RAZ in v8 */ -#define SCTLR_nAA (1U << 6) /* when v8.4-LSE is implemented */ +#define SCTLR_nAA (1U << 6) /* when FEAT_LSE2 is implemented */ #define SCTLR_B (1U << 7) /* up to v6; RAZ in v7 */ #define SCTLR_ITD (1U << 7) /* v8 onward */ #define SCTLR_S (1U << 8) /* up to v6; RAZ in v7 */ @@ -3036,6 +3044,7 @@ FIELD(TBFLAG_A64, SVL, 24, 4) /* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */ FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1) FIELD(TBFLAG_A64, FGT_ERET, 29, 1) +FIELD(TBFLAG_A64, NAA, 30, 1) /* * Helpers for using the above. @@ -3843,6 +3852,11 @@ static inline bool isar_feature_aa64_st(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0; } +static inline bool isar_feature_aa64_lse2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, AT) != 0; +} + static inline bool isar_feature_aa64_fwb(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, FWB) != 0; diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index d41cc643b1..8362462a07 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -842,12 +842,14 @@ static CPAccessResult access_tda(CPUARMState *env, const ARMCPRegInfo *ri, * is implemented then these are controlled by MDCR_EL2.TDCC for * EL2 and MDCR_EL3.TDCC for EL3. They are also controlled by * the general debug access trap bits MDCR_EL2.TDA and MDCR_EL3.TDA. + * For EL0, they are also controlled by MDSCR_EL1.TDCC. */ static CPAccessResult access_tdcc(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { int el = arm_current_el(env); uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + bool mdscr_el1_tdcc = extract32(env->cp15.mdscr_el1, 12, 1); bool mdcr_el2_tda = (mdcr_el2 & MDCR_TDA) || (mdcr_el2 & MDCR_TDE) || (arm_hcr_el2_eff(env) & HCR_TGE); bool mdcr_el2_tdcc = cpu_isar_feature(aa64_fgt, env_archcpu(env)) && @@ -855,6 +857,9 @@ static CPAccessResult access_tdcc(CPUARMState *env, const ARMCPRegInfo *ri, bool mdcr_el3_tdcc = cpu_isar_feature(aa64_fgt, env_archcpu(env)) && (env->cp15.mdcr_el3 & MDCR_TDCC); + if (el < 1 && mdscr_el1_tdcc) { + return CP_ACCESS_TRAP; + } if (el < 2 && (mdcr_el2_tda || mdcr_el2_tdcc)) { return CP_ACCESS_TRAP_EL2; } diff --git a/target/arm/helper.c b/target/arm/helper.c index 0b7fd2e7e6..d4bee43bd0 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -7405,7 +7405,6 @@ static const ARMCPRegInfo rndr_reginfo[] = { .access = PL0_R, .readfn = rndr_readfn }, }; -#ifndef CONFIG_USER_ONLY static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, uint64_t value) { @@ -7420,6 +7419,7 @@ static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, /* This won't be crossing page boundaries */ haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC()); if (haddr) { +#ifndef CONFIG_USER_ONLY ram_addr_t offset; MemoryRegion *mr; @@ -7430,6 +7430,7 @@ static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, if (mr) { memory_region_writeback(mr, offset, dline_size); } +#endif /*CONFIG_USER_ONLY*/ } } @@ -7448,7 +7449,6 @@ static const ARMCPRegInfo dcpodp_reg[] = { .fgt = FGT_DCCVADP, .accessfn = aa64_cacheop_poc_access, .writefn = dccvap_writefn }, }; -#endif /*CONFIG_USER_ONLY*/ static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) @@ -9092,7 +9092,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_tlbios, cpu)) { define_arm_cp_regs(cpu, tlbios_reginfo); } -#ifndef CONFIG_USER_ONLY /* Data Cache clean instructions up to PoP */ if (cpu_isar_feature(aa64_dcpop, cpu)) { define_one_arm_cp_reg(cpu, dcpop_reg); @@ -9101,7 +9100,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, dcpodp_reg); } } -#endif /*CONFIG_USER_ONLY*/ /* * If full MTE is enabled, add all of the system registers. diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 5900dc788f..8f72624586 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -31,6 +31,118 @@ #include "trace/trace-target_arm_hvf.h" #include "migration/vmstate.h" +#include "exec/gdbstub.h" + +#define MDSCR_EL1_SS_SHIFT 0 +#define MDSCR_EL1_MDE_SHIFT 15 + +static uint16_t dbgbcr_regs[] = { + HV_SYS_REG_DBGBCR0_EL1, + HV_SYS_REG_DBGBCR1_EL1, + HV_SYS_REG_DBGBCR2_EL1, + HV_SYS_REG_DBGBCR3_EL1, + HV_SYS_REG_DBGBCR4_EL1, + HV_SYS_REG_DBGBCR5_EL1, + HV_SYS_REG_DBGBCR6_EL1, + HV_SYS_REG_DBGBCR7_EL1, + HV_SYS_REG_DBGBCR8_EL1, + HV_SYS_REG_DBGBCR9_EL1, + HV_SYS_REG_DBGBCR10_EL1, + HV_SYS_REG_DBGBCR11_EL1, + HV_SYS_REG_DBGBCR12_EL1, + HV_SYS_REG_DBGBCR13_EL1, + HV_SYS_REG_DBGBCR14_EL1, + HV_SYS_REG_DBGBCR15_EL1, +}; +static uint16_t dbgbvr_regs[] = { + HV_SYS_REG_DBGBVR0_EL1, + HV_SYS_REG_DBGBVR1_EL1, + HV_SYS_REG_DBGBVR2_EL1, + HV_SYS_REG_DBGBVR3_EL1, + HV_SYS_REG_DBGBVR4_EL1, + HV_SYS_REG_DBGBVR5_EL1, + HV_SYS_REG_DBGBVR6_EL1, + HV_SYS_REG_DBGBVR7_EL1, + HV_SYS_REG_DBGBVR8_EL1, + HV_SYS_REG_DBGBVR9_EL1, + HV_SYS_REG_DBGBVR10_EL1, + HV_SYS_REG_DBGBVR11_EL1, + HV_SYS_REG_DBGBVR12_EL1, + HV_SYS_REG_DBGBVR13_EL1, + HV_SYS_REG_DBGBVR14_EL1, + HV_SYS_REG_DBGBVR15_EL1, +}; +static uint16_t dbgwcr_regs[] = { + HV_SYS_REG_DBGWCR0_EL1, + HV_SYS_REG_DBGWCR1_EL1, + HV_SYS_REG_DBGWCR2_EL1, + HV_SYS_REG_DBGWCR3_EL1, + HV_SYS_REG_DBGWCR4_EL1, + HV_SYS_REG_DBGWCR5_EL1, + HV_SYS_REG_DBGWCR6_EL1, + HV_SYS_REG_DBGWCR7_EL1, + HV_SYS_REG_DBGWCR8_EL1, + HV_SYS_REG_DBGWCR9_EL1, + HV_SYS_REG_DBGWCR10_EL1, + HV_SYS_REG_DBGWCR11_EL1, + HV_SYS_REG_DBGWCR12_EL1, + HV_SYS_REG_DBGWCR13_EL1, + HV_SYS_REG_DBGWCR14_EL1, + HV_SYS_REG_DBGWCR15_EL1, +}; +static uint16_t dbgwvr_regs[] = { + HV_SYS_REG_DBGWVR0_EL1, + HV_SYS_REG_DBGWVR1_EL1, + HV_SYS_REG_DBGWVR2_EL1, + HV_SYS_REG_DBGWVR3_EL1, + HV_SYS_REG_DBGWVR4_EL1, + HV_SYS_REG_DBGWVR5_EL1, + HV_SYS_REG_DBGWVR6_EL1, + HV_SYS_REG_DBGWVR7_EL1, + HV_SYS_REG_DBGWVR8_EL1, + HV_SYS_REG_DBGWVR9_EL1, + HV_SYS_REG_DBGWVR10_EL1, + HV_SYS_REG_DBGWVR11_EL1, + HV_SYS_REG_DBGWVR12_EL1, + HV_SYS_REG_DBGWVR13_EL1, + HV_SYS_REG_DBGWVR14_EL1, + HV_SYS_REG_DBGWVR15_EL1, +}; + +static inline int hvf_arm_num_brps(hv_vcpu_config_t config) +{ + uint64_t val; + hv_return_t ret; + ret = hv_vcpu_config_get_feature_reg(config, HV_FEATURE_REG_ID_AA64DFR0_EL1, + &val); + assert_hvf_ok(ret); + return FIELD_EX64(val, ID_AA64DFR0, BRPS) + 1; +} + +static inline int hvf_arm_num_wrps(hv_vcpu_config_t config) +{ + uint64_t val; + hv_return_t ret; + ret = hv_vcpu_config_get_feature_reg(config, HV_FEATURE_REG_ID_AA64DFR0_EL1, + &val); + assert_hvf_ok(ret); + return FIELD_EX64(val, ID_AA64DFR0, WRPS) + 1; +} + +void hvf_arm_init_debug(void) +{ + hv_vcpu_config_t config; + config = hv_vcpu_config_create(); + + max_hw_bps = hvf_arm_num_brps(config); + hw_breakpoints = + g_array_sized_new(true, true, sizeof(HWBreakpoint), max_hw_bps); + + max_hw_wps = hvf_arm_num_wrps(config); + hw_watchpoints = + g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps); +} + #define HVF_SYSREG(crn, crm, op0, op1, op2) \ ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2) #define PL1_WRITE_MASK 0x4 @@ -107,6 +219,72 @@ #define SYSREG_ICC_SGI1R_EL1 SYSREG(3, 0, 12, 11, 5) #define SYSREG_ICC_SRE_EL1 SYSREG(3, 0, 12, 12, 5) +#define SYSREG_MDSCR_EL1 SYSREG(2, 0, 0, 2, 2) +#define SYSREG_DBGBVR0_EL1 SYSREG(2, 0, 0, 0, 4) +#define SYSREG_DBGBCR0_EL1 SYSREG(2, 0, 0, 0, 5) +#define SYSREG_DBGWVR0_EL1 SYSREG(2, 0, 0, 0, 6) +#define SYSREG_DBGWCR0_EL1 SYSREG(2, 0, 0, 0, 7) +#define SYSREG_DBGBVR1_EL1 SYSREG(2, 0, 0, 1, 4) +#define SYSREG_DBGBCR1_EL1 SYSREG(2, 0, 0, 1, 5) +#define SYSREG_DBGWVR1_EL1 SYSREG(2, 0, 0, 1, 6) +#define SYSREG_DBGWCR1_EL1 SYSREG(2, 0, 0, 1, 7) +#define SYSREG_DBGBVR2_EL1 SYSREG(2, 0, 0, 2, 4) +#define SYSREG_DBGBCR2_EL1 SYSREG(2, 0, 0, 2, 5) +#define SYSREG_DBGWVR2_EL1 SYSREG(2, 0, 0, 2, 6) +#define SYSREG_DBGWCR2_EL1 SYSREG(2, 0, 0, 2, 7) +#define SYSREG_DBGBVR3_EL1 SYSREG(2, 0, 0, 3, 4) +#define SYSREG_DBGBCR3_EL1 SYSREG(2, 0, 0, 3, 5) +#define SYSREG_DBGWVR3_EL1 SYSREG(2, 0, 0, 3, 6) +#define SYSREG_DBGWCR3_EL1 SYSREG(2, 0, 0, 3, 7) +#define SYSREG_DBGBVR4_EL1 SYSREG(2, 0, 0, 4, 4) +#define SYSREG_DBGBCR4_EL1 SYSREG(2, 0, 0, 4, 5) +#define SYSREG_DBGWVR4_EL1 SYSREG(2, 0, 0, 4, 6) +#define SYSREG_DBGWCR4_EL1 SYSREG(2, 0, 0, 4, 7) +#define SYSREG_DBGBVR5_EL1 SYSREG(2, 0, 0, 5, 4) +#define SYSREG_DBGBCR5_EL1 SYSREG(2, 0, 0, 5, 5) +#define SYSREG_DBGWVR5_EL1 SYSREG(2, 0, 0, 5, 6) +#define SYSREG_DBGWCR5_EL1 SYSREG(2, 0, 0, 5, 7) +#define SYSREG_DBGBVR6_EL1 SYSREG(2, 0, 0, 6, 4) +#define SYSREG_DBGBCR6_EL1 SYSREG(2, 0, 0, 6, 5) +#define SYSREG_DBGWVR6_EL1 SYSREG(2, 0, 0, 6, 6) +#define SYSREG_DBGWCR6_EL1 SYSREG(2, 0, 0, 6, 7) +#define SYSREG_DBGBVR7_EL1 SYSREG(2, 0, 0, 7, 4) +#define SYSREG_DBGBCR7_EL1 SYSREG(2, 0, 0, 7, 5) +#define SYSREG_DBGWVR7_EL1 SYSREG(2, 0, 0, 7, 6) +#define SYSREG_DBGWCR7_EL1 SYSREG(2, 0, 0, 7, 7) +#define SYSREG_DBGBVR8_EL1 SYSREG(2, 0, 0, 8, 4) +#define SYSREG_DBGBCR8_EL1 SYSREG(2, 0, 0, 8, 5) +#define SYSREG_DBGWVR8_EL1 SYSREG(2, 0, 0, 8, 6) +#define SYSREG_DBGWCR8_EL1 SYSREG(2, 0, 0, 8, 7) +#define SYSREG_DBGBVR9_EL1 SYSREG(2, 0, 0, 9, 4) +#define SYSREG_DBGBCR9_EL1 SYSREG(2, 0, 0, 9, 5) +#define SYSREG_DBGWVR9_EL1 SYSREG(2, 0, 0, 9, 6) +#define SYSREG_DBGWCR9_EL1 SYSREG(2, 0, 0, 9, 7) +#define SYSREG_DBGBVR10_EL1 SYSREG(2, 0, 0, 10, 4) +#define SYSREG_DBGBCR10_EL1 SYSREG(2, 0, 0, 10, 5) +#define SYSREG_DBGWVR10_EL1 SYSREG(2, 0, 0, 10, 6) +#define SYSREG_DBGWCR10_EL1 SYSREG(2, 0, 0, 10, 7) +#define SYSREG_DBGBVR11_EL1 SYSREG(2, 0, 0, 11, 4) +#define SYSREG_DBGBCR11_EL1 SYSREG(2, 0, 0, 11, 5) +#define SYSREG_DBGWVR11_EL1 SYSREG(2, 0, 0, 11, 6) +#define SYSREG_DBGWCR11_EL1 SYSREG(2, 0, 0, 11, 7) +#define SYSREG_DBGBVR12_EL1 SYSREG(2, 0, 0, 12, 4) +#define SYSREG_DBGBCR12_EL1 SYSREG(2, 0, 0, 12, 5) +#define SYSREG_DBGWVR12_EL1 SYSREG(2, 0, 0, 12, 6) +#define SYSREG_DBGWCR12_EL1 SYSREG(2, 0, 0, 12, 7) +#define SYSREG_DBGBVR13_EL1 SYSREG(2, 0, 0, 13, 4) +#define SYSREG_DBGBCR13_EL1 SYSREG(2, 0, 0, 13, 5) +#define SYSREG_DBGWVR13_EL1 SYSREG(2, 0, 0, 13, 6) +#define SYSREG_DBGWCR13_EL1 SYSREG(2, 0, 0, 13, 7) +#define SYSREG_DBGBVR14_EL1 SYSREG(2, 0, 0, 14, 4) +#define SYSREG_DBGBCR14_EL1 SYSREG(2, 0, 0, 14, 5) +#define SYSREG_DBGWVR14_EL1 SYSREG(2, 0, 0, 14, 6) +#define SYSREG_DBGWCR14_EL1 SYSREG(2, 0, 0, 14, 7) +#define SYSREG_DBGBVR15_EL1 SYSREG(2, 0, 0, 15, 4) +#define SYSREG_DBGBCR15_EL1 SYSREG(2, 0, 0, 15, 5) +#define SYSREG_DBGWVR15_EL1 SYSREG(2, 0, 0, 15, 6) +#define SYSREG_DBGWCR15_EL1 SYSREG(2, 0, 0, 15, 7) + #define WFX_IS_WFE (1 << 0) #define TMR_CTL_ENABLE (1 << 0) @@ -397,6 +575,92 @@ int hvf_get_registers(CPUState *cpu) continue; } + if (cpu->hvf->guest_debug_enabled) { + /* Handle debug registers */ + switch (hvf_sreg_match[i].reg) { + case HV_SYS_REG_DBGBVR0_EL1: + case HV_SYS_REG_DBGBCR0_EL1: + case HV_SYS_REG_DBGWVR0_EL1: + case HV_SYS_REG_DBGWCR0_EL1: + case HV_SYS_REG_DBGBVR1_EL1: + case HV_SYS_REG_DBGBCR1_EL1: + case HV_SYS_REG_DBGWVR1_EL1: + case HV_SYS_REG_DBGWCR1_EL1: + case HV_SYS_REG_DBGBVR2_EL1: + case HV_SYS_REG_DBGBCR2_EL1: + case HV_SYS_REG_DBGWVR2_EL1: + case HV_SYS_REG_DBGWCR2_EL1: + case HV_SYS_REG_DBGBVR3_EL1: + case HV_SYS_REG_DBGBCR3_EL1: + case HV_SYS_REG_DBGWVR3_EL1: + case HV_SYS_REG_DBGWCR3_EL1: + case HV_SYS_REG_DBGBVR4_EL1: + case HV_SYS_REG_DBGBCR4_EL1: + case HV_SYS_REG_DBGWVR4_EL1: + case HV_SYS_REG_DBGWCR4_EL1: + case HV_SYS_REG_DBGBVR5_EL1: + case HV_SYS_REG_DBGBCR5_EL1: + case HV_SYS_REG_DBGWVR5_EL1: + case HV_SYS_REG_DBGWCR5_EL1: + case HV_SYS_REG_DBGBVR6_EL1: + case HV_SYS_REG_DBGBCR6_EL1: + case HV_SYS_REG_DBGWVR6_EL1: + case HV_SYS_REG_DBGWCR6_EL1: + case HV_SYS_REG_DBGBVR7_EL1: + case HV_SYS_REG_DBGBCR7_EL1: + case HV_SYS_REG_DBGWVR7_EL1: + case HV_SYS_REG_DBGWCR7_EL1: + case HV_SYS_REG_DBGBVR8_EL1: + case HV_SYS_REG_DBGBCR8_EL1: + case HV_SYS_REG_DBGWVR8_EL1: + case HV_SYS_REG_DBGWCR8_EL1: + case HV_SYS_REG_DBGBVR9_EL1: + case HV_SYS_REG_DBGBCR9_EL1: + case HV_SYS_REG_DBGWVR9_EL1: + case HV_SYS_REG_DBGWCR9_EL1: + case HV_SYS_REG_DBGBVR10_EL1: + case HV_SYS_REG_DBGBCR10_EL1: + case HV_SYS_REG_DBGWVR10_EL1: + case HV_SYS_REG_DBGWCR10_EL1: + case HV_SYS_REG_DBGBVR11_EL1: + case HV_SYS_REG_DBGBCR11_EL1: + case HV_SYS_REG_DBGWVR11_EL1: + case HV_SYS_REG_DBGWCR11_EL1: + case HV_SYS_REG_DBGBVR12_EL1: + case HV_SYS_REG_DBGBCR12_EL1: + case HV_SYS_REG_DBGWVR12_EL1: + case HV_SYS_REG_DBGWCR12_EL1: + case HV_SYS_REG_DBGBVR13_EL1: + case HV_SYS_REG_DBGBCR13_EL1: + case HV_SYS_REG_DBGWVR13_EL1: + case HV_SYS_REG_DBGWCR13_EL1: + case HV_SYS_REG_DBGBVR14_EL1: + case HV_SYS_REG_DBGBCR14_EL1: + case HV_SYS_REG_DBGWVR14_EL1: + case HV_SYS_REG_DBGWCR14_EL1: + case HV_SYS_REG_DBGBVR15_EL1: + case HV_SYS_REG_DBGBCR15_EL1: + case HV_SYS_REG_DBGWVR15_EL1: + case HV_SYS_REG_DBGWCR15_EL1: { + /* + * If the guest is being debugged, the vCPU's debug registers + * are holding the gdbstub's view of the registers (set in + * hvf_arch_update_guest_debug()). + * Since the environment is used to store only the guest's view + * of the registers, don't update it with the values from the + * vCPU but simply keep the values from the previous + * environment. + */ + const ARMCPRegInfo *ri; + ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_sreg_match[i].key); + val = read_raw_cp_reg(env, ri); + + arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val; + continue; + } + } + } + ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, &val); assert_hvf_ok(ret); @@ -448,6 +712,82 @@ int hvf_put_registers(CPUState *cpu) continue; } + if (cpu->hvf->guest_debug_enabled) { + /* Handle debug registers */ + switch (hvf_sreg_match[i].reg) { + case HV_SYS_REG_DBGBVR0_EL1: + case HV_SYS_REG_DBGBCR0_EL1: + case HV_SYS_REG_DBGWVR0_EL1: + case HV_SYS_REG_DBGWCR0_EL1: + case HV_SYS_REG_DBGBVR1_EL1: + case HV_SYS_REG_DBGBCR1_EL1: + case HV_SYS_REG_DBGWVR1_EL1: + case HV_SYS_REG_DBGWCR1_EL1: + case HV_SYS_REG_DBGBVR2_EL1: + case HV_SYS_REG_DBGBCR2_EL1: + case HV_SYS_REG_DBGWVR2_EL1: + case HV_SYS_REG_DBGWCR2_EL1: + case HV_SYS_REG_DBGBVR3_EL1: + case HV_SYS_REG_DBGBCR3_EL1: + case HV_SYS_REG_DBGWVR3_EL1: + case HV_SYS_REG_DBGWCR3_EL1: + case HV_SYS_REG_DBGBVR4_EL1: + case HV_SYS_REG_DBGBCR4_EL1: + case HV_SYS_REG_DBGWVR4_EL1: + case HV_SYS_REG_DBGWCR4_EL1: + case HV_SYS_REG_DBGBVR5_EL1: + case HV_SYS_REG_DBGBCR5_EL1: + case HV_SYS_REG_DBGWVR5_EL1: + case HV_SYS_REG_DBGWCR5_EL1: + case HV_SYS_REG_DBGBVR6_EL1: + case HV_SYS_REG_DBGBCR6_EL1: + case HV_SYS_REG_DBGWVR6_EL1: + case HV_SYS_REG_DBGWCR6_EL1: + case HV_SYS_REG_DBGBVR7_EL1: + case HV_SYS_REG_DBGBCR7_EL1: + case HV_SYS_REG_DBGWVR7_EL1: + case HV_SYS_REG_DBGWCR7_EL1: + case HV_SYS_REG_DBGBVR8_EL1: + case HV_SYS_REG_DBGBCR8_EL1: + case HV_SYS_REG_DBGWVR8_EL1: + case HV_SYS_REG_DBGWCR8_EL1: + case HV_SYS_REG_DBGBVR9_EL1: + case HV_SYS_REG_DBGBCR9_EL1: + case HV_SYS_REG_DBGWVR9_EL1: + case HV_SYS_REG_DBGWCR9_EL1: + case HV_SYS_REG_DBGBVR10_EL1: + case HV_SYS_REG_DBGBCR10_EL1: + case HV_SYS_REG_DBGWVR10_EL1: + case HV_SYS_REG_DBGWCR10_EL1: + case HV_SYS_REG_DBGBVR11_EL1: + case HV_SYS_REG_DBGBCR11_EL1: + case HV_SYS_REG_DBGWVR11_EL1: + case HV_SYS_REG_DBGWCR11_EL1: + case HV_SYS_REG_DBGBVR12_EL1: + case HV_SYS_REG_DBGBCR12_EL1: + case HV_SYS_REG_DBGWVR12_EL1: + case HV_SYS_REG_DBGWCR12_EL1: + case HV_SYS_REG_DBGBVR13_EL1: + case HV_SYS_REG_DBGBCR13_EL1: + case HV_SYS_REG_DBGWVR13_EL1: + case HV_SYS_REG_DBGWCR13_EL1: + case HV_SYS_REG_DBGBVR14_EL1: + case HV_SYS_REG_DBGBCR14_EL1: + case HV_SYS_REG_DBGWVR14_EL1: + case HV_SYS_REG_DBGWCR14_EL1: + case HV_SYS_REG_DBGBVR15_EL1: + case HV_SYS_REG_DBGBCR15_EL1: + case HV_SYS_REG_DBGWVR15_EL1: + case HV_SYS_REG_DBGWCR15_EL1: + /* + * If the guest is being debugged, the vCPU's debug registers + * are already holding the gdbstub's view of the registers (set + * in hvf_arch_update_guest_debug()). + */ + continue; + } + } + val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx]; ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, val); assert_hvf_ok(ret); @@ -933,6 +1273,78 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); } break; + case SYSREG_DBGBVR0_EL1: + case SYSREG_DBGBVR1_EL1: + case SYSREG_DBGBVR2_EL1: + case SYSREG_DBGBVR3_EL1: + case SYSREG_DBGBVR4_EL1: + case SYSREG_DBGBVR5_EL1: + case SYSREG_DBGBVR6_EL1: + case SYSREG_DBGBVR7_EL1: + case SYSREG_DBGBVR8_EL1: + case SYSREG_DBGBVR9_EL1: + case SYSREG_DBGBVR10_EL1: + case SYSREG_DBGBVR11_EL1: + case SYSREG_DBGBVR12_EL1: + case SYSREG_DBGBVR13_EL1: + case SYSREG_DBGBVR14_EL1: + case SYSREG_DBGBVR15_EL1: + val = env->cp15.dbgbvr[SYSREG_CRM(reg)]; + break; + case SYSREG_DBGBCR0_EL1: + case SYSREG_DBGBCR1_EL1: + case SYSREG_DBGBCR2_EL1: + case SYSREG_DBGBCR3_EL1: + case SYSREG_DBGBCR4_EL1: + case SYSREG_DBGBCR5_EL1: + case SYSREG_DBGBCR6_EL1: + case SYSREG_DBGBCR7_EL1: + case SYSREG_DBGBCR8_EL1: + case SYSREG_DBGBCR9_EL1: + case SYSREG_DBGBCR10_EL1: + case SYSREG_DBGBCR11_EL1: + case SYSREG_DBGBCR12_EL1: + case SYSREG_DBGBCR13_EL1: + case SYSREG_DBGBCR14_EL1: + case SYSREG_DBGBCR15_EL1: + val = env->cp15.dbgbcr[SYSREG_CRM(reg)]; + break; + case SYSREG_DBGWVR0_EL1: + case SYSREG_DBGWVR1_EL1: + case SYSREG_DBGWVR2_EL1: + case SYSREG_DBGWVR3_EL1: + case SYSREG_DBGWVR4_EL1: + case SYSREG_DBGWVR5_EL1: + case SYSREG_DBGWVR6_EL1: + case SYSREG_DBGWVR7_EL1: + case SYSREG_DBGWVR8_EL1: + case SYSREG_DBGWVR9_EL1: + case SYSREG_DBGWVR10_EL1: + case SYSREG_DBGWVR11_EL1: + case SYSREG_DBGWVR12_EL1: + case SYSREG_DBGWVR13_EL1: + case SYSREG_DBGWVR14_EL1: + case SYSREG_DBGWVR15_EL1: + val = env->cp15.dbgwvr[SYSREG_CRM(reg)]; + break; + case SYSREG_DBGWCR0_EL1: + case SYSREG_DBGWCR1_EL1: + case SYSREG_DBGWCR2_EL1: + case SYSREG_DBGWCR3_EL1: + case SYSREG_DBGWCR4_EL1: + case SYSREG_DBGWCR5_EL1: + case SYSREG_DBGWCR6_EL1: + case SYSREG_DBGWCR7_EL1: + case SYSREG_DBGWCR8_EL1: + case SYSREG_DBGWCR9_EL1: + case SYSREG_DBGWCR10_EL1: + case SYSREG_DBGWCR11_EL1: + case SYSREG_DBGWCR12_EL1: + case SYSREG_DBGWCR13_EL1: + case SYSREG_DBGWCR14_EL1: + case SYSREG_DBGWCR15_EL1: + val = env->cp15.dbgwcr[SYSREG_CRM(reg)]; + break; default: if (is_id_sysreg(reg)) { /* ID system registers read as RES0 */ @@ -1172,6 +1584,81 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); } break; + case SYSREG_MDSCR_EL1: + env->cp15.mdscr_el1 = val; + break; + case SYSREG_DBGBVR0_EL1: + case SYSREG_DBGBVR1_EL1: + case SYSREG_DBGBVR2_EL1: + case SYSREG_DBGBVR3_EL1: + case SYSREG_DBGBVR4_EL1: + case SYSREG_DBGBVR5_EL1: + case SYSREG_DBGBVR6_EL1: + case SYSREG_DBGBVR7_EL1: + case SYSREG_DBGBVR8_EL1: + case SYSREG_DBGBVR9_EL1: + case SYSREG_DBGBVR10_EL1: + case SYSREG_DBGBVR11_EL1: + case SYSREG_DBGBVR12_EL1: + case SYSREG_DBGBVR13_EL1: + case SYSREG_DBGBVR14_EL1: + case SYSREG_DBGBVR15_EL1: + env->cp15.dbgbvr[SYSREG_CRM(reg)] = val; + break; + case SYSREG_DBGBCR0_EL1: + case SYSREG_DBGBCR1_EL1: + case SYSREG_DBGBCR2_EL1: + case SYSREG_DBGBCR3_EL1: + case SYSREG_DBGBCR4_EL1: + case SYSREG_DBGBCR5_EL1: + case SYSREG_DBGBCR6_EL1: + case SYSREG_DBGBCR7_EL1: + case SYSREG_DBGBCR8_EL1: + case SYSREG_DBGBCR9_EL1: + case SYSREG_DBGBCR10_EL1: + case SYSREG_DBGBCR11_EL1: + case SYSREG_DBGBCR12_EL1: + case SYSREG_DBGBCR13_EL1: + case SYSREG_DBGBCR14_EL1: + case SYSREG_DBGBCR15_EL1: + env->cp15.dbgbcr[SYSREG_CRM(reg)] = val; + break; + case SYSREG_DBGWVR0_EL1: + case SYSREG_DBGWVR1_EL1: + case SYSREG_DBGWVR2_EL1: + case SYSREG_DBGWVR3_EL1: + case SYSREG_DBGWVR4_EL1: + case SYSREG_DBGWVR5_EL1: + case SYSREG_DBGWVR6_EL1: + case SYSREG_DBGWVR7_EL1: + case SYSREG_DBGWVR8_EL1: + case SYSREG_DBGWVR9_EL1: + case SYSREG_DBGWVR10_EL1: + case SYSREG_DBGWVR11_EL1: + case SYSREG_DBGWVR12_EL1: + case SYSREG_DBGWVR13_EL1: + case SYSREG_DBGWVR14_EL1: + case SYSREG_DBGWVR15_EL1: + env->cp15.dbgwvr[SYSREG_CRM(reg)] = val; + break; + case SYSREG_DBGWCR0_EL1: + case SYSREG_DBGWCR1_EL1: + case SYSREG_DBGWCR2_EL1: + case SYSREG_DBGWCR3_EL1: + case SYSREG_DBGWCR4_EL1: + case SYSREG_DBGWCR5_EL1: + case SYSREG_DBGWCR6_EL1: + case SYSREG_DBGWCR7_EL1: + case SYSREG_DBGWCR8_EL1: + case SYSREG_DBGWCR9_EL1: + case SYSREG_DBGWCR10_EL1: + case SYSREG_DBGWCR11_EL1: + case SYSREG_DBGWCR12_EL1: + case SYSREG_DBGWCR13_EL1: + case SYSREG_DBGWCR14_EL1: + case SYSREG_DBGWCR15_EL1: + env->cp15.dbgwcr[SYSREG_CRM(reg)] = val; + break; default: cpu_synchronize_state(cpu); trace_hvf_unhandled_sysreg_write(env->pc, reg, @@ -1317,11 +1804,13 @@ int hvf_vcpu_exec(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; + int ret; hv_vcpu_exit_t *hvf_exit = cpu->hvf->exit; hv_return_t r; bool advance_pc = false; - if (hvf_inject_interrupts(cpu)) { + if (!(cpu->singlestep_enabled & SSTEP_NOIRQ) && + hvf_inject_interrupts(cpu)) { return EXCP_INTERRUPT; } @@ -1339,6 +1828,7 @@ int hvf_vcpu_exec(CPUState *cpu) uint64_t syndrome = hvf_exit->exception.syndrome; uint32_t ec = syn_get_ec(syndrome); + ret = 0; qemu_mutex_lock_iothread(); switch (exit_reason) { case HV_EXIT_REASON_EXCEPTION: @@ -1358,6 +1848,49 @@ int hvf_vcpu_exec(CPUState *cpu) hvf_sync_vtimer(cpu); switch (ec) { + case EC_SOFTWARESTEP: { + ret = EXCP_DEBUG; + + if (!cpu->singlestep_enabled) { + error_report("EC_SOFTWARESTEP but single-stepping not enabled"); + } + break; + } + case EC_AA64_BKPT: { + ret = EXCP_DEBUG; + + cpu_synchronize_state(cpu); + + if (!hvf_find_sw_breakpoint(cpu, env->pc)) { + /* Re-inject into the guest */ + ret = 0; + hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0)); + } + break; + } + case EC_BREAKPOINT: { + ret = EXCP_DEBUG; + + cpu_synchronize_state(cpu); + + if (!find_hw_breakpoint(cpu, env->pc)) { + error_report("EC_BREAKPOINT but unknown hw breakpoint"); + } + break; + } + case EC_WATCHPOINT: { + ret = EXCP_DEBUG; + + cpu_synchronize_state(cpu); + + CPUWatchpoint *wp = + find_hw_watchpoint(cpu, hvf_exit->exception.virtual_address); + if (!wp) { + error_report("EXCP_DEBUG but unknown hw watchpoint"); + } + cpu->watchpoint_hit = wp; + break; + } case EC_DATAABORT: { bool isv = syndrome & ARM_EL_ISV; bool iswrite = (syndrome >> 6) & 1; @@ -1462,9 +1995,14 @@ int hvf_vcpu_exec(CPUState *cpu) pc += 4; r = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_PC, pc); assert_hvf_ok(r); + + /* Handle single-stepping over instructions which trigger a VM exit */ + if (cpu->singlestep_enabled) { + ret = EXCP_DEBUG; + } } - return 0; + return ret; } static const VMStateDescription vmstate_hvf_vtimer = { @@ -1496,5 +2034,213 @@ int hvf_arch_init(void) hvf_state->vtimer_offset = mach_absolute_time(); vmstate_register(NULL, 0, &vmstate_hvf_vtimer, &vtimer); qemu_add_vm_change_state_handler(hvf_vm_state_change, &vtimer); + + hvf_arm_init_debug(); + + return 0; +} + +static const uint32_t brk_insn = 0xd4200000; + +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || + cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { + return -EINVAL; + } return 0; } + +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + static uint32_t brk; + + if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&brk, 4, 0) || + brk != brk_insn || + cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { + return -EINVAL; + } + return 0; +} + +int hvf_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type) +{ + switch (type) { + case GDB_BREAKPOINT_HW: + return insert_hw_breakpoint(addr); + case GDB_WATCHPOINT_READ: + case GDB_WATCHPOINT_WRITE: + case GDB_WATCHPOINT_ACCESS: + return insert_hw_watchpoint(addr, len, type); + default: + return -ENOSYS; + } +} + +int hvf_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type) +{ + switch (type) { + case GDB_BREAKPOINT_HW: + return delete_hw_breakpoint(addr); + case GDB_WATCHPOINT_READ: + case GDB_WATCHPOINT_WRITE: + case GDB_WATCHPOINT_ACCESS: + return delete_hw_watchpoint(addr, len, type); + default: + return -ENOSYS; + } +} + +void hvf_arch_remove_all_hw_breakpoints(void) +{ + if (cur_hw_wps > 0) { + g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); + } + if (cur_hw_bps > 0) { + g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); + } +} + +/* + * Update the vCPU with the gdbstub's view of debug registers. This view + * consists of all hardware breakpoints and watchpoints inserted so far while + * debugging the guest. + */ +static void hvf_put_gdbstub_debug_registers(CPUState *cpu) +{ + hv_return_t r = HV_SUCCESS; + int i; + + for (i = 0; i < cur_hw_bps; i++) { + HWBreakpoint *bp = get_hw_bp(i); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgbcr_regs[i], bp->bcr); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgbvr_regs[i], bp->bvr); + assert_hvf_ok(r); + } + for (i = cur_hw_bps; i < max_hw_bps; i++) { + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgbcr_regs[i], 0); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgbvr_regs[i], 0); + assert_hvf_ok(r); + } + + for (i = 0; i < cur_hw_wps; i++) { + HWWatchpoint *wp = get_hw_wp(i); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgwcr_regs[i], wp->wcr); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgwvr_regs[i], wp->wvr); + assert_hvf_ok(r); + } + for (i = cur_hw_wps; i < max_hw_wps; i++) { + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgwcr_regs[i], 0); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgwvr_regs[i], 0); + assert_hvf_ok(r); + } +} + +/* + * Update the vCPU with the guest's view of debug registers. This view is kept + * in the environment at all times. + */ +static void hvf_put_guest_debug_registers(CPUState *cpu) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + hv_return_t r = HV_SUCCESS; + int i; + + for (i = 0; i < max_hw_bps; i++) { + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgbcr_regs[i], + env->cp15.dbgbcr[i]); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgbvr_regs[i], + env->cp15.dbgbvr[i]); + assert_hvf_ok(r); + } + + for (i = 0; i < max_hw_wps; i++) { + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgwcr_regs[i], + env->cp15.dbgwcr[i]); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->hvf->fd, dbgwvr_regs[i], + env->cp15.dbgwvr[i]); + assert_hvf_ok(r); + } +} + +static inline bool hvf_arm_hw_debug_active(CPUState *cpu) +{ + return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); +} + +static void hvf_arch_set_traps(void) +{ + CPUState *cpu; + bool should_enable_traps = false; + hv_return_t r = HV_SUCCESS; + + /* Check whether guest debugging is enabled for at least one vCPU; if it + * is, enable exiting the guest on all vCPUs */ + CPU_FOREACH(cpu) { + should_enable_traps |= cpu->hvf->guest_debug_enabled; + } + CPU_FOREACH(cpu) { + /* Set whether debug exceptions exit the guest */ + r = hv_vcpu_set_trap_debug_exceptions(cpu->hvf->fd, + should_enable_traps); + assert_hvf_ok(r); + + /* Set whether accesses to debug registers exit the guest */ + r = hv_vcpu_set_trap_debug_reg_accesses(cpu->hvf->fd, + should_enable_traps); + assert_hvf_ok(r); + } +} + +void hvf_arch_update_guest_debug(CPUState *cpu) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + + /* Check whether guest debugging is enabled */ + cpu->hvf->guest_debug_enabled = cpu->singlestep_enabled || + hvf_sw_breakpoints_active(cpu) || + hvf_arm_hw_debug_active(cpu); + + /* Update debug registers */ + if (cpu->hvf->guest_debug_enabled) { + hvf_put_gdbstub_debug_registers(cpu); + } else { + hvf_put_guest_debug_registers(cpu); + } + + cpu_synchronize_state(cpu); + + /* Enable/disable single-stepping */ + if (cpu->singlestep_enabled) { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_SS_SHIFT, 1, 1); + pstate_write(env, pstate_read(env) | PSTATE_SS); + } else { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_SS_SHIFT, 1, 0); + } + + /* Enable/disable Breakpoint exceptions */ + if (hvf_arm_hw_debug_active(cpu)) { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 1); + } else { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 0); + } + + hvf_arch_set_traps(); +} + +inline bool hvf_arch_supports_guest_debug(void) +{ + return true; +} diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h index 9a9d1a0bf5..e848c1d27d 100644 --- a/target/arm/hvf_arm.h +++ b/target/arm/hvf_arm.h @@ -13,6 +13,13 @@ #include "cpu.h" +/** + * hvf_arm_init_debug() - initialize guest debug capabilities + * + * Should be called only once before using guest debug capabilities. + */ +void hvf_arm_init_debug(void); + void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu); #endif diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c new file mode 100644 index 0000000000..ebde2899cd --- /dev/null +++ b/target/arm/hyp_gdbstub.c @@ -0,0 +1,253 @@ +/* + * ARM implementation of KVM and HVF hooks, 64 bit specific code + * + * Copyright Mian-M. Hamayun 2013, Virtual Open Systems + * Copyright Alex Bennée 2014, Linaro + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/gdbstub.h" + +/* Maximum and current break/watch point counts */ +int max_hw_bps, max_hw_wps; +GArray *hw_breakpoints, *hw_watchpoints; + +/** + * insert_hw_breakpoint() + * @addr: address of breakpoint + * + * See ARM ARM D2.9.1 for details but here we are only going to create + * simple un-linked breakpoints (i.e. we don't chain breakpoints + * together to match address and context or vmid). The hardware is + * capable of fancier matching but that will require exposing that + * fanciness to GDB's interface + * + * DBGBCR<n>_EL1, Debug Breakpoint Control Registers + * + * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 + * +------+------+-------+-----+----+------+-----+------+-----+---+ + * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | + * +------+------+-------+-----+----+------+-----+------+-----+---+ + * + * BT: Breakpoint type (0 = unlinked address match) + * LBN: Linked BP number (0 = unused) + * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) + * BAS: Byte Address Select (RES1 for AArch64) + * E: Enable bit + * + * DBGBVR<n>_EL1, Debug Breakpoint Value Registers + * + * 63 53 52 49 48 2 1 0 + * +------+-----------+----------+-----+ + * | RESS | VA[52:49] | VA[48:2] | 0 0 | + * +------+-----------+----------+-----+ + * + * Depending on the addressing mode bits the top bits of the register + * are a sign extension of the highest applicable VA bit. Some + * versions of GDB don't do it correctly so we ensure they are correct + * here so future PC comparisons will work properly. + */ + +int insert_hw_breakpoint(target_ulong addr) +{ + HWBreakpoint brk = { + .bcr = 0x1, /* BCR E=1, enable */ + .bvr = sextract64(addr, 0, 53) + }; + + if (cur_hw_bps >= max_hw_bps) { + return -ENOBUFS; + } + + brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ + brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ + + g_array_append_val(hw_breakpoints, brk); + + return 0; +} + +/** + * delete_hw_breakpoint() + * @pc: address of breakpoint + * + * Delete a breakpoint and shuffle any above down + */ + +int delete_hw_breakpoint(target_ulong pc) +{ + int i; + for (i = 0; i < hw_breakpoints->len; i++) { + HWBreakpoint *brk = get_hw_bp(i); + if (brk->bvr == pc) { + g_array_remove_index(hw_breakpoints, i); + return 0; + } + } + return -ENOENT; +} + +/** + * insert_hw_watchpoint() + * @addr: address of watch point + * @len: size of area + * @type: type of watch point + * + * See ARM ARM D2.10. As with the breakpoints we can do some advanced + * stuff if we want to. The watch points can be linked with the break + * points above to make them context aware. However for simplicity + * currently we only deal with simple read/write watch points. + * + * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers + * + * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 + * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ + * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | + * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ + * + * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) + * WT: 0 - unlinked, 1 - linked (not currently used) + * LBN: Linked BP number (not currently used) + * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) + * BAS: Byte Address Select + * LSC: Load/Store control (01: load, 10: store, 11: both) + * E: Enable + * + * The bottom 2 bits of the value register are masked. Therefore to + * break on any sizes smaller than an unaligned word you need to set + * MASK=0, BAS=bit per byte in question. For larger regions (^2) you + * need to ensure you mask the address as required and set BAS=0xff + */ + +int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type) +{ + HWWatchpoint wp = { + .wcr = R_DBGWCR_E_MASK, /* E=1, enable */ + .wvr = addr & (~0x7ULL), + .details = { .vaddr = addr, .len = len } + }; + + if (cur_hw_wps >= max_hw_wps) { + return -ENOBUFS; + } + + /* + * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, + * valid whether EL3 is implemented or not + */ + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, PAC, 3); + + switch (type) { + case GDB_WATCHPOINT_READ: + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 1); + wp.details.flags = BP_MEM_READ; + break; + case GDB_WATCHPOINT_WRITE: + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 2); + wp.details.flags = BP_MEM_WRITE; + break; + case GDB_WATCHPOINT_ACCESS: + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 3); + wp.details.flags = BP_MEM_ACCESS; + break; + default: + g_assert_not_reached(); + break; + } + if (len <= 8) { + /* we align the address and set the bits in BAS */ + int off = addr & 0x7; + int bas = (1 << len) - 1; + + wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); + } else { + /* For ranges above 8 bytes we need to be a power of 2 */ + if (is_power_of_2(len)) { + int bits = ctz64(len); + + wp.wvr &= ~((1 << bits) - 1); + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, MASK, bits); + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, BAS, 0xff); + } else { + return -ENOBUFS; + } + } + + g_array_append_val(hw_watchpoints, wp); + return 0; +} + +bool check_watchpoint_in_range(int i, target_ulong addr) +{ + HWWatchpoint *wp = get_hw_wp(i); + uint64_t addr_top, addr_bottom = wp->wvr; + int bas = extract32(wp->wcr, 5, 8); + int mask = extract32(wp->wcr, 24, 4); + + if (mask) { + addr_top = addr_bottom + (1 << mask); + } else { + /* + * BAS must be contiguous but can offset against the base + * address in DBGWVR + */ + addr_bottom = addr_bottom + ctz32(bas); + addr_top = addr_bottom + clo32(bas); + } + + if (addr >= addr_bottom && addr <= addr_top) { + return true; + } + + return false; +} + +/** + * delete_hw_watchpoint() + * @addr: address of breakpoint + * + * Delete a breakpoint and shuffle any above down + */ + +int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type) +{ + int i; + for (i = 0; i < cur_hw_wps; i++) { + if (check_watchpoint_in_range(i, addr)) { + g_array_remove_index(hw_watchpoints, i); + return 0; + } + } + return -ENOENT; +} + +bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) +{ + int i; + + for (i = 0; i < cur_hw_bps; i++) { + HWBreakpoint *bp = get_hw_bp(i); + if (bp->bvr == pc) { + return true; + } + } + return false; +} + +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) +{ + int i; + + for (i = 0; i < cur_hw_wps; i++) { + if (check_watchpoint_in_range(i, addr)) { + return &get_hw_wp(i)->details; + } + } + return NULL; +} diff --git a/target/arm/internals.h b/target/arm/internals.h index c869d18c38..e3029bdc37 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1242,7 +1242,8 @@ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) -FIELD(MTEDESC, SIZEM1, 9, SIMD_DATA_BITS - 9) /* size - 1 */ +FIELD(MTEDESC, ALIGN, 9, 3) +FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - 12) /* size - 1 */ bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); @@ -1447,4 +1448,54 @@ static inline bool arm_fgt_active(CPUARMState *env, int el) } void assert_hflags_rebuild_correctly(CPUARMState *env); + +/* + * Although the ARM implementation of hardware assisted debugging + * allows for different breakpoints per-core, the current GDB + * interface treats them as a global pool of registers (which seems to + * be the case for x86, ppc and s390). As a result we store one copy + * of registers which is used for all active cores. + * + * Write access is serialised by virtue of the GDB protocol which + * updates things. Read access (i.e. when the values are copied to the + * vCPU) is also gated by GDB's run control. + * + * This is not unreasonable as most of the time debugging kernels you + * never know which core will eventually execute your function. + */ + +typedef struct { + uint64_t bcr; + uint64_t bvr; +} HWBreakpoint; + +/* + * The watchpoint registers can cover more area than the requested + * watchpoint so we need to store the additional information + * somewhere. We also need to supply a CPUWatchpoint to the GDB stub + * when the watchpoint is hit. + */ +typedef struct { + uint64_t wcr; + uint64_t wvr; + CPUWatchpoint details; +} HWWatchpoint; + +/* Maximum and current break/watch point counts */ +extern int max_hw_bps, max_hw_wps; +extern GArray *hw_breakpoints, *hw_watchpoints; + +#define cur_hw_wps (hw_watchpoints->len) +#define cur_hw_bps (hw_breakpoints->len) +#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) +#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) + +bool find_hw_breakpoint(CPUState *cpu, target_ulong pc); +int insert_hw_breakpoint(target_ulong pc); +int delete_hw_breakpoint(target_ulong pc); + +bool check_watchpoint_in_range(int i, target_ulong addr); +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr); +int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type); +int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type); #endif diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 810db33ccb..94bbd9661f 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -34,46 +34,6 @@ static bool have_guest_debug; -/* - * Although the ARM implementation of hardware assisted debugging - * allows for different breakpoints per-core, the current GDB - * interface treats them as a global pool of registers (which seems to - * be the case for x86, ppc and s390). As a result we store one copy - * of registers which is used for all active cores. - * - * Write access is serialised by virtue of the GDB protocol which - * updates things. Read access (i.e. when the values are copied to the - * vCPU) is also gated by GDB's run control. - * - * This is not unreasonable as most of the time debugging kernels you - * never know which core will eventually execute your function. - */ - -typedef struct { - uint64_t bcr; - uint64_t bvr; -} HWBreakpoint; - -/* The watchpoint registers can cover more area than the requested - * watchpoint so we need to store the additional information - * somewhere. We also need to supply a CPUWatchpoint to the GDB stub - * when the watchpoint is hit. - */ -typedef struct { - uint64_t wcr; - uint64_t wvr; - CPUWatchpoint details; -} HWWatchpoint; - -/* Maximum and current break/watch point counts */ -int max_hw_bps, max_hw_wps; -GArray *hw_breakpoints, *hw_watchpoints; - -#define cur_hw_wps (hw_watchpoints->len) -#define cur_hw_bps (hw_breakpoints->len) -#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) -#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) - void kvm_arm_init_debug(KVMState *s) { have_guest_debug = kvm_check_extension(s, @@ -89,217 +49,6 @@ void kvm_arm_init_debug(KVMState *s) return; } -/** - * insert_hw_breakpoint() - * @addr: address of breakpoint - * - * See ARM ARM D2.9.1 for details but here we are only going to create - * simple un-linked breakpoints (i.e. we don't chain breakpoints - * together to match address and context or vmid). The hardware is - * capable of fancier matching but that will require exposing that - * fanciness to GDB's interface - * - * DBGBCR<n>_EL1, Debug Breakpoint Control Registers - * - * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 - * +------+------+-------+-----+----+------+-----+------+-----+---+ - * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | - * +------+------+-------+-----+----+------+-----+------+-----+---+ - * - * BT: Breakpoint type (0 = unlinked address match) - * LBN: Linked BP number (0 = unused) - * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) - * BAS: Byte Address Select (RES1 for AArch64) - * E: Enable bit - * - * DBGBVR<n>_EL1, Debug Breakpoint Value Registers - * - * 63 53 52 49 48 2 1 0 - * +------+-----------+----------+-----+ - * | RESS | VA[52:49] | VA[48:2] | 0 0 | - * +------+-----------+----------+-----+ - * - * Depending on the addressing mode bits the top bits of the register - * are a sign extension of the highest applicable VA bit. Some - * versions of GDB don't do it correctly so we ensure they are correct - * here so future PC comparisons will work properly. - */ - -static int insert_hw_breakpoint(target_ulong addr) -{ - HWBreakpoint brk = { - .bcr = 0x1, /* BCR E=1, enable */ - .bvr = sextract64(addr, 0, 53) - }; - - if (cur_hw_bps >= max_hw_bps) { - return -ENOBUFS; - } - - brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ - brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ - - g_array_append_val(hw_breakpoints, brk); - - return 0; -} - -/** - * delete_hw_breakpoint() - * @pc: address of breakpoint - * - * Delete a breakpoint and shuffle any above down - */ - -static int delete_hw_breakpoint(target_ulong pc) -{ - int i; - for (i = 0; i < hw_breakpoints->len; i++) { - HWBreakpoint *brk = get_hw_bp(i); - if (brk->bvr == pc) { - g_array_remove_index(hw_breakpoints, i); - return 0; - } - } - return -ENOENT; -} - -/** - * insert_hw_watchpoint() - * @addr: address of watch point - * @len: size of area - * @type: type of watch point - * - * See ARM ARM D2.10. As with the breakpoints we can do some advanced - * stuff if we want to. The watch points can be linked with the break - * points above to make them context aware. However for simplicity - * currently we only deal with simple read/write watch points. - * - * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers - * - * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 - * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ - * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | - * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ - * - * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) - * WT: 0 - unlinked, 1 - linked (not currently used) - * LBN: Linked BP number (not currently used) - * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) - * BAS: Byte Address Select - * LSC: Load/Store control (01: load, 10: store, 11: both) - * E: Enable - * - * The bottom 2 bits of the value register are masked. Therefore to - * break on any sizes smaller than an unaligned word you need to set - * MASK=0, BAS=bit per byte in question. For larger regions (^2) you - * need to ensure you mask the address as required and set BAS=0xff - */ - -static int insert_hw_watchpoint(target_ulong addr, - target_ulong len, int type) -{ - HWWatchpoint wp = { - .wcr = R_DBGWCR_E_MASK, /* E=1, enable */ - .wvr = addr & (~0x7ULL), - .details = { .vaddr = addr, .len = len } - }; - - if (cur_hw_wps >= max_hw_wps) { - return -ENOBUFS; - } - - /* - * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, - * valid whether EL3 is implemented or not - */ - wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, PAC, 3); - - switch (type) { - case GDB_WATCHPOINT_READ: - wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 1); - wp.details.flags = BP_MEM_READ; - break; - case GDB_WATCHPOINT_WRITE: - wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 2); - wp.details.flags = BP_MEM_WRITE; - break; - case GDB_WATCHPOINT_ACCESS: - wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 3); - wp.details.flags = BP_MEM_ACCESS; - break; - default: - g_assert_not_reached(); - break; - } - if (len <= 8) { - /* we align the address and set the bits in BAS */ - int off = addr & 0x7; - int bas = (1 << len) - 1; - - wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); - } else { - /* For ranges above 8 bytes we need to be a power of 2 */ - if (is_power_of_2(len)) { - int bits = ctz64(len); - - wp.wvr &= ~((1 << bits) - 1); - wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, MASK, bits); - wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, BAS, 0xff); - } else { - return -ENOBUFS; - } - } - - g_array_append_val(hw_watchpoints, wp); - return 0; -} - - -static bool check_watchpoint_in_range(int i, target_ulong addr) -{ - HWWatchpoint *wp = get_hw_wp(i); - uint64_t addr_top, addr_bottom = wp->wvr; - int bas = extract32(wp->wcr, 5, 8); - int mask = extract32(wp->wcr, 24, 4); - - if (mask) { - addr_top = addr_bottom + (1 << mask); - } else { - /* BAS must be contiguous but can offset against the base - * address in DBGWVR */ - addr_bottom = addr_bottom + ctz32(bas); - addr_top = addr_bottom + clo32(bas); - } - - if (addr >= addr_bottom && addr <= addr_top) { - return true; - } - - return false; -} - -/** - * delete_hw_watchpoint() - * @addr: address of breakpoint - * - * Delete a breakpoint and shuffle any above down - */ - -static int delete_hw_watchpoint(target_ulong addr, - target_ulong len, int type) -{ - int i; - for (i = 0; i < cur_hw_wps; i++) { - if (check_watchpoint_in_range(i, addr)) { - g_array_remove_index(hw_watchpoints, i); - return 0; - } - } - return -ENOENT; -} - - int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type) { @@ -364,31 +113,6 @@ bool kvm_arm_hw_debug_active(CPUState *cs) return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); } -static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) -{ - int i; - - for (i = 0; i < cur_hw_bps; i++) { - HWBreakpoint *bp = get_hw_bp(i); - if (bp->bvr == pc) { - return true; - } - } - return false; -} - -static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) -{ - int i; - - for (i = 0; i < cur_hw_wps; i++) { - if (check_watchpoint_in_range(i, addr)) { - return &get_hw_wp(i)->details; - } - } - return NULL; -} - static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, const char *name) { diff --git a/target/arm/meson.build b/target/arm/meson.build index 359a649eaf..011e8ca113 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -8,7 +8,8 @@ arm_ss.add(files( )) arm_ss.add(zlib) -arm_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c', 'kvm64.c'), if_false: files('kvm-stub.c')) +arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c'), if_false: files('kvm-stub.c')) +arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 886674a443..2976f94ae4 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -644,6 +644,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */ t = FIELD_DP64(t, ID_AA64MMFR2, VARANGE, 1); /* FEAT_LVA */ t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1); /* FEAT_TTST */ + t = FIELD_DP64(t, ID_AA64MMFR2, AT, 1); /* FEAT_LSE2 */ t = FIELD_DP64(t, ID_AA64MMFR2, IDS, 1); /* FEAT_IDST */ t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */ t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */ diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index c3edf163be..1c9370f07b 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -952,3 +952,10 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) memset(mem, 0, blocklen); } + +void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, + uint32_t access_type, uint32_t mmu_idx) +{ + arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, + mmu_idx, GETPC()); +} diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index ff56807247..3d5957c11f 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -110,3 +110,6 @@ DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) + +DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG, + noreturn, env, i64, i32, i32) diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c index b2ccd77cff..616c5fa723 100644 --- a/target/arm/tcg/hflags.c +++ b/target/arm/tcg/hflags.c @@ -248,6 +248,12 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } } + if (cpu_isar_feature(aa64_lse2, env_archcpu(env))) { + if (sctlr & SCTLR_nAA) { + DP_TBFLAG_A64(flags, NAA, 1); + } + } + /* Compute the condition for using AccType_UNPRIV for LDTR et al. */ if (!(env->pstate & PSTATE_UAO)) { switch (mmu_idx) { diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c index a4f3f92bc0..9c64def081 100644 --- a/target/arm/tcg/mte_helper.c +++ b/target/arm/tcg/mte_helper.c @@ -785,6 +785,24 @@ uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr) { + /* + * R_XCHFJ: Alignment check not caused by memory type is priority 1, + * higher than any translation fault. When MTE is disabled, tcg + * performs the alignment check during the code generated for the + * memory access. With MTE enabled, we must check this here before + * raising any translation fault in allocation_tag_mem. + */ + unsigned align = FIELD_EX32(desc, MTEDESC, ALIGN); + if (unlikely(align)) { + align = (1u << align) - 1; + if (unlikely(ptr & align)) { + int idx = FIELD_EX32(desc, MTEDESC, MIDX); + bool w = FIELD_EX32(desc, MTEDESC, WRITE); + MMUAccessType type = w ? MMU_DATA_STORE : MMU_DATA_LOAD; + arm_cpu_do_unaligned_access(env_cpu(env), ptr, type, idx, GETPC()); + } + } + return mte_check(env, desc, ptr, GETPC()); } diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index d9800337cf..aa93f37e21 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -253,7 +253,7 @@ static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, */ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, bool is_write, bool tag_checked, - int log2_size, bool is_unpriv, + MemOp memop, bool is_unpriv, int core_idx) { if (tag_checked && s->mte_active[is_unpriv]) { @@ -264,7 +264,8 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); + desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop)); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); ret = tcg_temp_new_i64(); gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); @@ -275,9 +276,9 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, } TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_size) + bool tag_checked, MemOp memop) { - return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, + return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, false, get_mem_index(s)); } @@ -285,7 +286,7 @@ TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, * For MTE, check multiple logical sequential accesses. */ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int size) + bool tag_checked, int total_size, MemOp single_mop) { if (tag_checked && s->mte_active[0]) { TCGv_i64 ret; @@ -295,7 +296,8 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); + desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop)); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); ret = tcg_temp_new_i64(); gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); @@ -305,6 +307,89 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, return clean_data_tbi(s, addr); } +/* + * Generate the special alignment check that applies to AccType_ATOMIC + * and AccType_ORDERED insns under FEAT_LSE2: the access need not be + * naturally aligned, but it must not cross a 16-byte boundary. + * See AArch64.CheckAlignment(). + */ +static void check_lse2_align(DisasContext *s, int rn, int imm, + bool is_write, MemOp mop) +{ + TCGv_i32 tmp; + TCGv_i64 addr; + TCGLabel *over_label; + MMUAccessType type; + int mmu_idx; + + tmp = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); + tcg_gen_addi_i32(tmp, tmp, imm & 15); + tcg_gen_andi_i32(tmp, tmp, 15); + tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); + + over_label = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); + + addr = tcg_temp_new_i64(); + tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); + + type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, + mmu_idx = get_mem_index(s); + gen_helper_unaligned_access(cpu_env, addr, tcg_constant_i32(type), + tcg_constant_i32(mmu_idx)); + + gen_set_label(over_label); + +} + +/* Handle the alignment check for AccType_ATOMIC instructions. */ +static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) +{ + MemOp size = mop & MO_SIZE; + + if (size == MO_8) { + return mop; + } + + /* + * If size == MO_128, this is a LDXP, and the operation is single-copy + * atomic for each doubleword, not the entire quadword; it still must + * be quadword aligned. + */ + if (size == MO_128) { + return finalize_memop_atom(s, MO_128 | MO_ALIGN, + MO_ATOM_IFALIGN_PAIR); + } + if (dc_isar_feature(aa64_lse2, s)) { + check_lse2_align(s, rn, 0, true, mop); + } else { + mop |= MO_ALIGN; + } + return finalize_memop(s, mop); +} + +/* Handle the alignment check for AccType_ORDERED instructions. */ +static MemOp check_ordered_align(DisasContext *s, int rn, int imm, + bool is_write, MemOp mop) +{ + MemOp size = mop & MO_SIZE; + + if (size == MO_8) { + return mop; + } + if (size == MO_128) { + return finalize_memop_atom(s, MO_128 | MO_ALIGN, + MO_ATOM_IFALIGN_PAIR); + } + if (!dc_isar_feature(aa64_lse2, s)) { + mop |= MO_ALIGN; + } else if (!s->naa) { + check_lse2_align(s, rn, imm, is_write, mop); + } + return finalize_memop(s, mop); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -838,7 +923,6 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - memop = finalize_memop(s, memop); tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); if (iss_valid) { @@ -873,7 +957,6 @@ static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - memop = finalize_memop(s, memop); tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); if (extend && (memop & MO_SIGN)) { @@ -907,59 +990,44 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, /* * Store from FP register to memory */ -static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) +static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) { /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmplo = tcg_temp_new_i64(); - MemOp mop; tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); - if (size < 4) { - mop = finalize_memop(s, size); + if ((mop & MO_SIZE) < MO_128) { tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); TCGv_i64 tmphi = tcg_temp_new_i64(); + TCGv_i128 t16 = tcg_temp_new_i128(); tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); + tcg_gen_concat_i64_i128(t16, tmplo, tmphi); - mop = s->be_data | MO_UQ; - tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); + tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); } } /* * Load from memory to FP register */ -static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) +static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) { /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi = NULL; - MemOp mop; - if (size < 4) { - mop = finalize_memop(s, size); + if ((mop & MO_SIZE) < MO_128) { tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr; + TCGv_i128 t16 = tcg_temp_new_i128(); - tmphi = tcg_temp_new_i64(); - tcg_hiaddr = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); - mop = s->be_data | MO_UQ; - tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); + tmphi = tcg_temp_new_i64(); + tcg_gen_extr_i128_i64(tmplo, tmphi, t16); } tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); @@ -2382,19 +2450,22 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) * races in multi-threaded linux-user and when MTTCG softmmu is * enabled. */ -static void gen_load_exclusive(DisasContext *s, int rt, int rt2, - TCGv_i64 addr, int size, bool is_pair) +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, + int size, bool is_pair) { int idx = get_mem_index(s); - MemOp memop = s->be_data; + TCGv_i64 dirty_addr, clean_addr; + MemOp memop = check_atomic_align(s, rn, size + is_pair); + + s->is_ldex = true; + dirty_addr = cpu_reg_sp(s, rn); + clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); g_assert(size <= 3); if (is_pair) { g_assert(size >= 2); if (size == 2) { - /* The pair must be single-copy atomic for the doubleword. */ - memop |= MO_64 | MO_ALIGN; - tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); if (s->be_data == MO_LE) { tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); @@ -2403,29 +2474,29 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); } } else { - /* The pair must be single-copy atomic for *each* doubleword, not - the entire quadword, however it must be quadword aligned. */ - memop |= MO_64; - tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, - memop | MO_ALIGN_16); + TCGv_i128 t16 = tcg_temp_new_i128(); - TCGv_i64 addr2 = tcg_temp_new_i64(); - tcg_gen_addi_i64(addr2, addr, 8); - tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); + tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(cpu_exclusive_val, + cpu_exclusive_high, t16); + } else { + tcg_gen_extr_i128_i64(cpu_exclusive_high, + cpu_exclusive_val, t16); + } tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); } } else { - memop |= size | MO_ALIGN; - tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); } - tcg_gen_mov_i64(cpu_exclusive_addr, addr); + tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); } static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, - TCGv_i64 addr, int size, int is_pair) + int rn, int size, int is_pair) { /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] * && (!is_pair || env->exclusive_high == [addr + datasize])) { @@ -2441,9 +2512,46 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, */ TCGLabel *fail_label = gen_new_label(); TCGLabel *done_label = gen_new_label(); - TCGv_i64 tmp; + TCGv_i64 tmp, clean_addr; + MemOp memop; + + /* + * FIXME: We are out of spec here. We have recorded only the address + * from load_exclusive, not the entire range, and we assume that the + * size of the access on both sides match. The architecture allows the + * store to be smaller than the load, so long as the stored bytes are + * within the range recorded by the load. + */ + + /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ + clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); + /* + * The write, and any associated faults, only happen if the virtual + * and physical addresses pass the exclusive monitor check. These + * faults are exceedingly unlikely, because normally the guest uses + * the exact same address register for the load_exclusive, and we + * would have recognized these faults there. + * + * It is possible to trigger an alignment fault pre-LSE2, e.g. with an + * unaligned 4-byte write within the range of an aligned 8-byte load. + * With LSE2, the store would need to cross a 16-byte boundary when the + * load did not, which would mean the store is outside the range + * recorded for the monitor, which would have failed a corrected monitor + * check above. For now, we assume no size change and retain the + * MO_ALIGN to let tcg know what we checked in the load_exclusive. + * + * It is possible to trigger an MTE fault, by performing the load with + * a virtual address with a valid tag and performing the store with the + * same virtual address and a different invalid tag. + */ + memop = size + is_pair; + if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { + memop |= MO_ALIGN; + } + memop = finalize_memop(s, memop); + gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); tmp = tcg_temp_new_i64(); if (is_pair) { @@ -2455,8 +2563,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, } tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, tmp, - get_mem_index(s), - MO_64 | MO_ALIGN | s->be_data); + get_mem_index(s), memop); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); } else { TCGv_i128 t16 = tcg_temp_new_i128(); @@ -2474,8 +2581,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, } tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, - get_mem_index(s), - MO_128 | MO_ALIGN | s->be_data); + get_mem_index(s), memop); a = tcg_temp_new_i64(); b = tcg_temp_new_i64(); @@ -2493,8 +2599,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, - cpu_reg(s, rt), get_mem_index(s), - size | MO_ALIGN | s->be_data); + cpu_reg(s, rt), get_mem_index(s), memop); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); } tcg_gen_mov_i64(cpu_reg(s, rd), tmp); @@ -2513,13 +2618,15 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt, TCGv_i64 tcg_rt = cpu_reg(s, rt); int memidx = get_mem_index(s); TCGv_i64 clean_addr; + MemOp memop; if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); - tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, - size | MO_ALIGN | s->be_data); + memop = check_atomic_align(s, rn, size); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); + tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, + memidx, memop); } static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, @@ -2531,13 +2638,15 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, TCGv_i64 t2 = cpu_reg(s, rt + 1); TCGv_i64 clean_addr; int memidx = get_mem_index(s); + MemOp memop; if (rn == 31) { gen_check_sp_alignment(s); } /* This is a single atomic access, despite the "pair". */ - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); + memop = check_atomic_align(s, rn, size + 1); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); if (size == 2) { TCGv_i64 cmp = tcg_temp_new_i64(); @@ -2551,8 +2660,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, tcg_gen_concat32_i64(cmp, s2, s1); } - tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, - MO_64 | MO_ALIGN | s->be_data); + tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); if (s->be_data == MO_LE) { tcg_gen_extr32_i64(s1, s2, cmp); @@ -2571,8 +2679,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, tcg_gen_concat_i64_i128(cmp, s2, s1); } - tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, - MO_128 | MO_ALIGN | s->be_data); + tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); if (s->be_data == MO_LE) { tcg_gen_extr_i128_i64(s1, s2, cmp); @@ -2621,6 +2728,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; int size = extract32(insn, 30, 2); TCGv_i64 clean_addr; + MemOp memop; switch (o2_L_o1_o0) { case 0x0: /* STXR */ @@ -2631,9 +2739,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - true, rn != 31, size); - gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); + gen_store_exclusive(s, rs, rt, rt2, rn, size, false); return; case 0x4: /* LDXR */ @@ -2641,10 +2747,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - false, rn != 31, size); - s->is_ldex = true; - gen_load_exclusive(s, rt, rt2, clean_addr, size, false); + gen_load_exclusive(s, rt, rt2, rn, size, false); if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } @@ -2662,10 +2765,10 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + memop = check_ordered_align(s, rn, 0, true, size); clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - true, rn != 31, size); - /* TODO: ARMv8.4-LSE SCTLR.nAA */ - do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, + true, rn != 31, memop); + do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); return; @@ -2680,10 +2783,10 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } + memop = check_ordered_align(s, rn, 0, false, size); clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - false, rn != 31, size); - /* TODO: ARMv8.4-LSE SCTLR.nAA */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, + false, rn != 31, memop); + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); return; @@ -2696,9 +2799,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - true, rn != 31, size); - gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); + gen_store_exclusive(s, rs, rt, rt2, rn, size, true); return; } if (rt2 == 31 @@ -2715,10 +2816,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - false, rn != 31, size); - s->is_ldex = true; - gen_load_exclusive(s, rt, rt2, clean_addr, size, true); + gen_load_exclusive(s, rt, rt2, rn, size, true); if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } @@ -2768,6 +2866,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) bool is_signed = false; int size = 2; TCGv_i64 tcg_rt, clean_addr; + MemOp memop; if (is_vector) { if (opc == 3) { @@ -2778,6 +2877,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } + memop = finalize_memop_asimd(s, size); } else { if (opc == 3) { /* PRFM (literal) : prefetch */ @@ -2785,20 +2885,20 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) } size = 2 + extract32(opc, 0, 1); is_signed = extract32(opc, 1, 1); + memop = finalize_memop(s, size + is_signed * MO_SIGN); } tcg_rt = cpu_reg(s, rt); clean_addr = tcg_temp_new_i64(); gen_pc_plus_diff(s, clean_addr, imm); + if (is_vector) { - do_fp_ld(s, rt, clean_addr, size); + do_fp_ld(s, rt, clean_addr, memop); } else { /* Only unsigned 32bit loads target 32bit registers. */ bool iss_sf = opc != 0; - - do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, - false, true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, memop, false, true, rt, iss_sf, false); } } @@ -2840,14 +2940,12 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) bool is_vector = extract32(insn, 26, 1); bool is_load = extract32(insn, 22, 1); int opc = extract32(insn, 30, 2); - bool is_signed = false; bool postindex = false; bool wback = false; bool set_tag = false; - TCGv_i64 clean_addr, dirty_addr; - + MemOp mop; int size; if (opc == 3) { @@ -2930,44 +3028,94 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) } } + if (is_vector) { + mop = finalize_memop_asimd(s, size); + } else { + mop = finalize_memop(s, size); + } clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, - (wback || rn != 31) && !set_tag, 2 << size); + (wback || rn != 31) && !set_tag, + 2 << size, mop); if (is_vector) { + /* LSE2 does not merge FP pairs; leave these as separate operations. */ if (is_load) { - do_fp_ld(s, rt, clean_addr, size); + do_fp_ld(s, rt, clean_addr, mop); } else { - do_fp_st(s, rt, clean_addr, size); + do_fp_st(s, rt, clean_addr, mop); } tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); if (is_load) { - do_fp_ld(s, rt2, clean_addr, size); + do_fp_ld(s, rt2, clean_addr, mop); } else { - do_fp_st(s, rt2, clean_addr, size); + do_fp_st(s, rt2, clean_addr, mop); } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); + /* + * We built mop above for the single logical access -- rebuild it + * now for the paired operation. + * + * With LSE2, non-sign-extending pairs are treated atomically if + * aligned, and if unaligned one of the pair will be completely + * within a 16-byte block and that element will be atomic. + * Otherwise each element is separately atomic. + * In all cases, issue one operation with the correct atomicity. + * + * This treats sign-extending loads like zero-extending loads, + * since that reuses the most code below. + */ + mop = size + 1; + if (s->align_mem) { + mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8); + } + mop = finalize_memop_pair(s, mop); + if (is_load) { - TCGv_i64 tmp = tcg_temp_new_i64(); + if (size == 2) { + int o2 = s->be_data == MO_LE ? 32 : 0; + int o1 = o2 ^ 32; - /* Do not modify tcg_rt before recognizing any exception - * from the second load. - */ - do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, - false, false, 0, false, false); - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, - false, false, 0, false, false); + tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); + if (is_signed) { + tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); + tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); + } else { + tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); + tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); + } + } else { + TCGv_i128 tmp = tcg_temp_new_i128(); - tcg_gen_mov_i64(tcg_rt, tmp); + tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); + } else { + tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); + } + } } else { - do_gpr_st(s, tcg_rt, clean_addr, size, - false, 0, false, false); - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_st(s, tcg_rt2, clean_addr, size, - false, 0, false, false); + if (size == 2) { + TCGv_i64 tmp = tcg_temp_new_i64(); + + if (s->be_data == MO_LE) { + tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); + } + tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); + } else { + TCGv_i128 tmp = tcg_temp_new_i128(); + + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); + } + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); + } } } @@ -3012,7 +3160,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, bool post_index; bool writeback; int memidx; - + MemOp memop; TCGv_i64 clean_addr, dirty_addr; if (is_vector) { @@ -3025,6 +3173,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, if (!fp_access_check(s)) { return; } + memop = finalize_memop_asimd(s, size); } else { if (size == 3 && opc == 2) { /* PRFM - prefetch */ @@ -3039,8 +3188,9 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, return; } is_store = (opc == 0); - is_signed = extract32(opc, 1, 1); + is_signed = !is_store && extract32(opc, 1, 1); is_extended = (size < 3) && extract32(opc, 0, 1); + memop = finalize_memop(s, size + is_signed * MO_SIGN); } switch (idx) { @@ -3073,25 +3223,26 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, } memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, writeback || rn != 31, size, is_unpriv, memidx); if (is_vector) { if (is_store) { - do_fp_st(s, rt, clean_addr, size); + do_fp_st(s, rt, clean_addr, memop); } else { - do_fp_ld(s, rt, clean_addr, size); + do_fp_ld(s, rt, clean_addr, memop); } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, + do_gpr_st_memidx(s, tcg_rt, clean_addr, memop, memidx, iss_valid, rt, iss_sf, false); } else { - do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + do_gpr_ld_memidx(s, tcg_rt, clean_addr, memop, is_extended, memidx, iss_valid, rt, iss_sf, false); } @@ -3140,8 +3291,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, bool is_signed = false; bool is_store = false; bool is_extended = false; - TCGv_i64 tcg_rm, clean_addr, dirty_addr; + MemOp memop; if (extract32(opt, 1, 1) == 0) { unallocated_encoding(s); @@ -3168,7 +3319,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, return; } is_store = (opc == 0); - is_signed = extract32(opc, 1, 1); + is_signed = !is_store && extract32(opc, 1, 1); is_extended = (size < 3) && extract32(opc, 0, 1); } @@ -3181,22 +3332,25 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); - clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); + + memop = finalize_memop(s, size + is_signed * MO_SIGN); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, memop); if (is_vector) { if (is_store) { - do_fp_st(s, rt, clean_addr, size); + do_fp_st(s, rt, clean_addr, memop); } else { - do_fp_ld(s, rt, clean_addr, size); + do_fp_ld(s, rt, clean_addr, memop); } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); + if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, + do_gpr_st(s, tcg_rt, clean_addr, memop, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + do_gpr_ld(s, tcg_rt, clean_addr, memop, is_extended, true, rt, iss_sf, false); } } @@ -3228,12 +3382,11 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, int rn = extract32(insn, 5, 5); unsigned int imm12 = extract32(insn, 10, 12); unsigned int offset; - TCGv_i64 clean_addr, dirty_addr; - bool is_store; bool is_signed = false; bool is_extended = false; + MemOp memop; if (is_vector) { size |= (opc & 2) << 1; @@ -3255,7 +3408,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, return; } is_store = (opc == 0); - is_signed = extract32(opc, 1, 1); + is_signed = !is_store && extract32(opc, 1, 1); is_extended = (size < 3) && extract32(opc, 0, 1); } @@ -3265,22 +3418,23 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, dirty_addr = read_cpu_reg_sp(s, rn, 1); offset = imm12 << size; tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); + + memop = finalize_memop(s, size + is_signed * MO_SIGN); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, memop); if (is_vector) { if (is_store) { - do_fp_st(s, rt, clean_addr, size); + do_fp_st(s, rt, clean_addr, memop); } else { - do_fp_ld(s, rt, clean_addr, size); + do_fp_ld(s, rt, clean_addr, memop); } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, - true, rt, iss_sf, false); + do_gpr_st(s, tcg_rt, clean_addr, memop, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, + do_gpr_ld(s, tcg_rt, clean_addr, memop, is_extended, true, rt, iss_sf, false); } } @@ -3310,7 +3464,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, bool a = extract32(insn, 23, 1); TCGv_i64 tcg_rs, tcg_rt, clean_addr; AtomicThreeOpFn *fn = NULL; - MemOp mop = s->be_data | size | MO_ALIGN; + MemOp mop = size; if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); @@ -3361,7 +3515,9 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); + + mop = check_atomic_align(s, rn, mop); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop); if (o3_opc == 014) { /* @@ -3371,7 +3527,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, * full load-acquire (we only need "load-acquire processor consistent"), * but we choose to implement them as full LDAQ. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); return; @@ -3417,6 +3573,7 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, bool use_key_a = !extract32(insn, 23, 1); int offset; TCGv_i64 clean_addr, dirty_addr, tcg_rt; + MemOp memop; if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { unallocated_encoding(s); @@ -3443,12 +3600,14 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, offset = sextract32(offset << size, 0, 10 + size); tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); + memop = finalize_memop(s, size); + /* Note that "clean" and "dirty" here refer to TBI not PAC. */ clean_addr = gen_mte_check1(s, dirty_addr, false, - is_wback || rn != 31, size); + is_wback || rn != 31, memop); tcg_rt = cpu_reg(s, rt); - do_gpr_ld(s, tcg_rt, clean_addr, size, + do_gpr_ld(s, tcg_rt, clean_addr, memop, /* extend */ false, /* iss_valid */ !is_wback, /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); @@ -3482,16 +3641,13 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) bool is_store = false; bool extend = false; bool iss_sf; - MemOp mop; + MemOp mop = size; if (!dc_isar_feature(aa64_rcpc_8_4, s)) { unallocated_encoding(s); return; } - /* TODO: ARMv8.4-LSE SCTLR.nAA */ - mop = size | MO_ALIGN; - switch (opc) { case 0: /* STLURB */ is_store = true; @@ -3523,6 +3679,8 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } + mop = check_ordered_align(s, rn, offset, is_store, mop); + dirty_addr = read_cpu_reg_sp(s, rn, 1); tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); clean_addr = clean_data_tbi(s, dirty_addr); @@ -3689,7 +3847,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) * promote consecutive little-endian elements below. */ clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, - total); + total, finalize_memop(s, size)); /* * Consecutive little-endian elements from a single register @@ -3847,10 +4005,11 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) total = selem << scale; tcg_rn = cpu_reg_sp(s, rn); - clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, - total); mop = finalize_memop(s, scale); + clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, + total, mop); + tcg_ebytes = tcg_constant_i64(1 << scale); for (xs = 0; xs < selem; xs++) { if (replicate) { @@ -4062,15 +4221,18 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) if (is_zero) { TCGv_i64 clean_addr = clean_data_tbi(s, addr); - TCGv_i64 tcg_zero = tcg_constant_i64(0); + TCGv_i64 zero64 = tcg_constant_i64(0); + TCGv_i128 zero128 = tcg_temp_new_i128(); int mem_index = get_mem_index(s); - int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; + MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); + + tcg_gen_concat_i64_i128(zero128, zero64, zero64); - tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, - MO_UQ | MO_ALIGN_16); - for (i = 8; i < n; i += 8) { - tcg_gen_addi_i64(clean_addr, clean_addr, 8); - tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ); + /* This is 1 or 2 atomic 16-byte operations. */ + tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); + if (is_pair) { + tcg_gen_addi_i64(clean_addr, clean_addr, 16); + tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); } } @@ -14087,6 +14249,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); + dc->naa = EX_TBFLAG_A64(tb_flags, NAA); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; @@ -14098,6 +14261,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, tcg_debug_assert(dc->tbid & 1); #endif + dc->lse2 = dc_isar_feature(aa64_lse2, dc); + /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: * generate code with no special handling for single-stepping (except diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index 0576c4ea12..b55dc435fc 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -49,9 +49,9 @@ static inline bool sme_smza_enabled_check(DisasContext *s) TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_size); + bool tag_checked, MemOp memop); TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int size); + bool tag_checked, int total_size, MemOp memop); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index d9d5810dde..ff050626e6 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -4167,15 +4167,16 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, int len, int rn, int imm) { - int len_align = QEMU_ALIGN_DOWN(len, 8); - int len_remain = len % 8; - int nparts = len / 8 + ctpop8(len_remain); + int len_align = QEMU_ALIGN_DOWN(len, 16); + int len_remain = len % 16; + int nparts = len / 16 + ctpop8(len_remain); int midx = get_mem_index(s); TCGv_i64 dirty_addr, clean_addr, t0, t1; + TCGv_i128 t16; dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); /* * Note that unpredicated load/store of vector/predicate registers @@ -4188,10 +4189,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, int i; t0 = tcg_temp_new_i64(); - for (i = 0; i < len_align; i += 8) { - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); + t1 = tcg_temp_new_i64(); + t16 = tcg_temp_new_i128(); + + for (i = 0; i < len_align; i += 16) { + tcg_gen_qemu_ld_i128(t16, clean_addr, midx, + MO_LE | MO_128 | MO_ATOM_NONE); + tcg_gen_extr_i128_i64(t0, t1, t16); tcg_gen_st_i64(t0, base, vofs + i); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tcg_gen_st_i64(t1, base, vofs + i + 8); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); } } else { TCGLabel *loop = gen_new_label(); @@ -4200,14 +4207,21 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, tcg_gen_movi_ptr(i, 0); gen_set_label(loop); - t0 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); + t16 = tcg_temp_new_i128(); + tcg_gen_qemu_ld_i128(t16, clean_addr, midx, + MO_LE | MO_128 | MO_ATOM_NONE); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); tp = tcg_temp_new_ptr(); tcg_gen_add_ptr(tp, base, i); - tcg_gen_addi_ptr(i, i, 8); + tcg_gen_addi_ptr(i, i, 16); + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_extr_i128_i64(t0, t1, t16); + tcg_gen_st_i64(t0, tp, vofs); + tcg_gen_st_i64(t1, tp, vofs + 8); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); } @@ -4216,6 +4230,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, * Predicate register loads can be any multiple of 2. * Note that we still store the entire 64-bit unit into cpu_env. */ + if (len_remain >= 8) { + t0 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + tcg_gen_st_i64(t0, base, vofs + len_align); + len_remain -= 8; + len_align += 8; + if (len_remain) { + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + } + } if (len_remain) { t0 = tcg_temp_new_i64(); switch (len_remain) { @@ -4223,14 +4247,14 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, case 4: case 8: tcg_gen_qemu_ld_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain)); + MO_LE | ctz32(len_remain) | MO_ATOM_NONE); break; case 6: t1 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); tcg_gen_addi_i64(clean_addr, clean_addr, 4); - tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); + tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); tcg_gen_deposit_i64(t0, t0, t1, 32, 32); break; @@ -4245,15 +4269,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, int len, int rn, int imm) { - int len_align = QEMU_ALIGN_DOWN(len, 8); - int len_remain = len % 8; - int nparts = len / 8 + ctpop8(len_remain); + int len_align = QEMU_ALIGN_DOWN(len, 16); + int len_remain = len % 16; + int nparts = len / 16 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 dirty_addr, clean_addr, t0; + TCGv_i64 dirty_addr, clean_addr, t0, t1; + TCGv_i128 t16; dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); /* Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian @@ -4267,10 +4292,15 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, int i; t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t16 = tcg_temp_new_i128(); for (i = 0; i < len_align; i += 8) { tcg_gen_ld_i64(t0, base, vofs + i); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tcg_gen_ld_i64(t1, base, vofs + i + 8); + tcg_gen_concat_i64_i128(t16, t0, t1); + tcg_gen_qemu_st_i128(t16, clean_addr, midx, + MO_LE | MO_128 | MO_ATOM_NONE); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); } } else { TCGLabel *loop = gen_new_label(); @@ -4280,18 +4310,33 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, gen_set_label(loop); t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); tp = tcg_temp_new_ptr(); tcg_gen_add_ptr(tp, base, i); tcg_gen_ld_i64(t0, tp, vofs); - tcg_gen_addi_ptr(i, i, 8); + tcg_gen_ld_i64(t1, tp, vofs + 8); + tcg_gen_addi_ptr(i, i, 16); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); + t16 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(t16, t0, t1); + + tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); } /* Predicate register stores can be any multiple of 2. */ + if (len_remain >= 8) { + t0 = tcg_temp_new_i64(); + tcg_gen_st_i64(t0, base, vofs + len_align); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + len_remain -= 8; + len_align += 8; + if (len_remain) { + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + } + } if (len_remain) { t0 = tcg_temp_new_i64(); tcg_gen_ld_i64(t0, base, vofs + len_align); @@ -4301,14 +4346,14 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, case 4: case 8: tcg_gen_qemu_st_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain)); + MO_LE | ctz32(len_remain) | MO_ATOM_NONE); break; case 6: - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_shri_i64(t0, t0, 32); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); break; default: @@ -4964,6 +5009,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) unsigned msz = dtype_msz(a->dtype); TCGLabel *over; TCGv_i64 temp, clean_addr; + MemOp memop; if (!dc_isar_feature(aa64_sve, s)) { return false; @@ -4993,10 +5039,10 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Load the data. */ temp = tcg_temp_new_i64(); tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); - clean_addr = gen_mte_check1(s, temp, false, true, msz); - tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), - finalize_memop(s, dtype_mop[a->dtype])); + memop = finalize_memop(s, dtype_mop[a->dtype]); + clean_addr = gen_mte_check1(s, temp, false, true, memop); + tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); /* Broadcast to *all* elements. */ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index a68d3c7f6d..13c88ba1b9 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -9168,6 +9168,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->sme_trap_nonstreaming = EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); } + dc->lse2 = false; /* applies only to aarch64 */ dc->cp_regs = cpu->cp_regs; dc->features = env->features; diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index 4d88197715..d1cacff0b2 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -90,6 +90,7 @@ typedef struct DisasContext { uint64_t features; /* CPU features bits */ bool aarch64; bool thumb; + bool lse2; /* Because unallocated encodings generate different exception syndrome * information from traps due to FP being disabled, we can't do a single * "is fp access disabled" check at a high level in the decode tree. @@ -141,6 +142,8 @@ typedef struct DisasContext { bool fgt_eret; /* True if fine-grained trap on SVC is enabled */ bool fgt_svc; + /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */ + bool naa; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction. @@ -557,12 +560,13 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) } /** - * finalize_memop: + * finalize_memop_atom: * @s: DisasContext * @opc: size+sign+align of the memory operation + * @atom: atomicity of the memory operation * - * Build the complete MemOp for a memory operation, including alignment - * and endianness. + * Build the complete MemOp for a memory operation, including alignment, + * endianness, and atomicity. * * If (op & MO_AMASK) then the operation already contains the required * alignment, e.g. for AccType_ATOMIC. Otherwise, this an optionally @@ -572,12 +576,63 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) * and this is applied here. Note that there is no way to indicate that * no alignment should ever be enforced; this must be handled manually. */ -static inline MemOp finalize_memop(DisasContext *s, MemOp opc) +static inline MemOp finalize_memop_atom(DisasContext *s, MemOp opc, MemOp atom) { if (s->align_mem && !(opc & MO_AMASK)) { opc |= MO_ALIGN; } - return opc | s->be_data; + return opc | atom | s->be_data; +} + +/** + * finalize_memop: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Like finalize_memop_atom, but with default atomicity. + */ +static inline MemOp finalize_memop(DisasContext *s, MemOp opc) +{ + MemOp atom = s->lse2 ? MO_ATOM_WITHIN16 : MO_ATOM_IFALIGN; + return finalize_memop_atom(s, opc, atom); +} + +/** + * finalize_memop_pair: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Like finalize_memop_atom, but with atomicity for a pair. + * C.f. Pseudocode for Mem[], operand ispair. + */ +static inline MemOp finalize_memop_pair(DisasContext *s, MemOp opc) +{ + MemOp atom = s->lse2 ? MO_ATOM_WITHIN16_PAIR : MO_ATOM_IFALIGN_PAIR; + return finalize_memop_atom(s, opc, atom); +} + +/** + * finalize_memop_asimd: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Like finalize_memop_atom, but with atomicity of AccessType_ASIMD. + */ +static inline MemOp finalize_memop_asimd(DisasContext *s, MemOp opc) +{ + /* + * In the pseudocode for Mem[], with AccessType_ASIMD, size == 16, + * if IsAligned(8), the first case provides separate atomicity for + * the pair of 64-bit accesses. If !IsAligned(8), the middle cases + * do not apply, and we're left with the final case of no atomicity. + * Thus MO_ATOM_IFALIGN_PAIR. + * + * For other sizes, normal LSE2 rules apply. + */ + if ((opc & MO_SIZE) == MO_128) { + return finalize_memop_atom(s, opc, MO_ATOM_IFALIGN_PAIR); + } + return finalize_memop(s, opc); } /** diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 8d2248bb3f..f6775c942a 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -679,3 +679,36 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } + +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + return -ENOSYS; +} + +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + return -ENOSYS; +} + +int hvf_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type) +{ + return -ENOSYS; +} + +int hvf_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type) +{ + return -ENOSYS; +} + +void hvf_arch_remove_all_hw_breakpoints(void) +{ +} + +void hvf_arch_update_guest_debug(CPUState *cpu) +{ +} + +inline bool hvf_arch_supports_guest_debug(void) +{ + return false; +} |