diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2023-07-24 11:34:01 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2023-07-24 11:34:01 +0100 |
commit | c5216b69545ec391c6099a0816fc537c49ad0063 (patch) | |
tree | 7cd3b0162d7574079f7a604bc15bcc8e6598be76 | |
parent | d1181d29370a4318a9f11ea92065bea6bb159f83 (diff) | |
parent | 32b120394c578bc824f1db4835b3bffbeca88fae (diff) |
Merge tag 'pull-tcg-20230724' of https://gitlab.com/rth7680/qemu into staging
accel/tcg: Zero-pad vaddr in tlb debug output
accel/tcg: Fix type of 'last' for pageflags_{find,next}
accel/tcg: Fix sense of read-only probes in ldst_atomicity
accel/tcg: Take mmap_lock in load_atomic*_or_exit
tcg: Add earlyclobber to op_add2 for x86 and s390x
tcg/ppc: Fix race in goto_tb implementation
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmS+O7cdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8qrAf/VeAFnMbtantUTfM5
# zOcfBlutsDlJrNwA/ajFDrPwUDewP7s5cqxImAYqhXfhqlc2RIB3UiMCgSaQ+q6O
# MBOH0bEj/zbeIlwRX07ZBWhUYVdqJVd7Nxb1W19YwgG9yieWUxa+Xo1i2fhyXMv+
# 20VOFB1dPnxYyUMrzh/bSiHE90JFZktO1WzV10FRD+IpnImY9R+YGdpGTpVzUhor
# ReRHTkMKyYilY6EEUG2gFhotrY/bbSSSFyl9BcQjkZh11603nAN0mNKxtSjPJnNB
# rXhCVEgmbbBvCufsO6szQ03W/7RZ/KCg/DyKqxyCP1Ril4BIOx3tiucROcapXH/K
# 0y/ycA==
# =hdk/
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 24 Jul 2023 09:52:07 BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* tag 'pull-tcg-20230724' of https://gitlab.com/rth7680/qemu:
accel/tcg: Fix type of 'last' for pageflags_{find,next}
accel/tcg: Zero-pad vaddr in tlb_debug output
tcg/{i386, s390x}: Add earlyclobber to the op_add2's first output
accel/tcg: Take mmap_lock in load_atomic*_or_exit
accel/tcg: Fix sense of read-only probes in ldst_atomicity
include/exec: Add WITH_MMAP_LOCK_GUARD
tcg/ppc: Fix race in goto_tb implementation
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | accel/tcg/cputlb.c | 20 | ||||
-rw-r--r-- | accel/tcg/ldst_atomicity.c.inc | 32 | ||||
-rw-r--r-- | accel/tcg/user-exec.c | 4 | ||||
-rw-r--r-- | bsd-user/mmap.c | 1 | ||||
-rw-r--r-- | include/exec/exec-all.h | 10 | ||||
-rw-r--r-- | linux-user/mmap.c | 1 | ||||
-rw-r--r-- | tcg/i386/tcg-target-con-set.h | 5 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 2 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c.inc | 9 | ||||
-rw-r--r-- | tcg/s390x/tcg-target-con-set.h | 8 | ||||
-rw-r--r-- | tcg/s390x/tcg-target.c.inc | 4 | ||||
-rw-r--r-- | tcg/tcg.c | 8 |
12 files changed, 66 insertions, 38 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e0079c9a9d..ba44501a7c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -497,8 +497,8 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page) /* Check if we need to flush due to large pages. */ if ((page & lp_mask) == lp_addr) { - tlb_debug("forcing full flush midx %d (%" - VADDR_PRIx "/%" VADDR_PRIx ")\n", + tlb_debug("forcing full flush midx %d (%016" + VADDR_PRIx "/%016" VADDR_PRIx ")\n", midx, lp_addr, lp_mask); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); } else { @@ -527,7 +527,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, assert_cpu_is_self(cpu); - tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); + tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { @@ -591,7 +591,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap) { - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -625,7 +625,7 @@ void tlb_flush_page(CPUState *cpu, vaddr addr) void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr, uint16_t idxmap) { - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -666,7 +666,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, vaddr addr, uint16_t idxmap) { - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -728,7 +728,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, */ if (mask < f->mask || len > f->mask) { tlb_debug("forcing full flush midx %d (" - "%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n", + "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n", midx, addr, mask, len); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); return; @@ -741,7 +741,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, */ if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { tlb_debug("forcing full flush midx %d (" - "%" VADDR_PRIx "/%" VADDR_PRIx ")\n", + "%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n", midx, d->large_page_addr, d->large_page_mask); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); return; @@ -773,7 +773,7 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, assert_cpu_is_self(cpu); - tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n", + tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n", d.addr, d.bits, d.len, d.idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); @@ -1165,7 +1165,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, &xlat, &sz, full->attrs, &prot); assert(sz >= TARGET_PAGE_SIZE); - tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx + tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx " prot=%x idx=%d\n", addr, full->phys_addr, prot, mmu_idx); diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc index 4de0a80492..e5c590a499 100644 --- a/accel/tcg/ldst_atomicity.c.inc +++ b/accel/tcg/ldst_atomicity.c.inc @@ -159,9 +159,11 @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv) * another process, because the fallback start_exclusive solution * provides no protection across processes. */ - if (page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { - uint64_t *p = __builtin_assume_aligned(pv, 8); - return *p; + WITH_MMAP_LOCK_GUARD() { + if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { + uint64_t *p = __builtin_assume_aligned(pv, 8); + return *p; + } } #endif @@ -186,25 +188,27 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv) return atomic16_read_ro(p); } -#ifdef CONFIG_USER_ONLY /* * We can only use cmpxchg to emulate a load if the page is writable. * If the page is not writable, then assume the value is immutable * and requires no locking. This ignores the case of MAP_SHARED with * another process, because the fallback start_exclusive solution * provides no protection across processes. + * + * In system mode all guest pages are writable. For user mode, + * we must take mmap_lock so that the query remains valid until + * the write is complete -- tests/tcg/multiarch/munmap-pthread.c + * is an example that can race. */ - if (page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { - return *p; - } + WITH_MMAP_LOCK_GUARD() { +#ifdef CONFIG_USER_ONLY + if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { + return *p; + } #endif - - /* - * In system mode all guest pages are writable, and for user-only - * we have just checked writability. Try cmpxchg. - */ - if (HAVE_ATOMIC128_RW) { - return atomic16_read_rw(p); + if (HAVE_ATOMIC128_RW) { + return atomic16_read_rw(p); + } } /* Ultimate fallback: re-execute in serial context. */ diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index ac38c2bf96..ab48cb41e4 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -144,7 +144,7 @@ typedef struct PageFlagsNode { static IntervalTreeRoot pageflags_root; -static PageFlagsNode *pageflags_find(target_ulong start, target_long last) +static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last) { IntervalTreeNode *n; @@ -153,7 +153,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, target_long last) } static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start, - target_long last) + target_ulong last) { IntervalTreeNode *n; diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c index aca8764356..74ed00b9fe 100644 --- a/bsd-user/mmap.c +++ b/bsd-user/mmap.c @@ -32,6 +32,7 @@ void mmap_lock(void) void mmap_unlock(void) { + assert(mmap_lock_count > 0); if (--mmap_lock_count == 0) { pthread_mutex_unlock(&mmap_mutex); } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 5fa0687cd2..d02517e95f 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -629,6 +629,15 @@ void TSA_NO_TSA mmap_lock(void); void TSA_NO_TSA mmap_unlock(void); bool have_mmap_lock(void); +static inline void mmap_unlock_guard(void *unused) +{ + mmap_unlock(); +} + +#define WITH_MMAP_LOCK_GUARD() \ + for (int _mmap_lock_iter __attribute__((cleanup(mmap_unlock_guard))) \ + = (mmap_lock(), 0); _mmap_lock_iter == 0; _mmap_lock_iter = 1) + /** * adjust_signal_pc: * @pc: raw pc from the host signal ucontext_t. @@ -683,6 +692,7 @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, #else static inline void mmap_lock(void) {} static inline void mmap_unlock(void) {} +#define WITH_MMAP_LOCK_GUARD() void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); void tlb_set_dirty(CPUState *cpu, vaddr addr); diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 44b53bd446..a5dfb56545 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -36,6 +36,7 @@ void mmap_lock(void) void mmap_unlock(void) { + assert(mmap_lock_count > 0); if (--mmap_lock_count == 0) { pthread_mutex_unlock(&mmap_mutex); } diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 91ceb0e1da..5ea3a292f0 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -11,6 +11,9 @@ * * C_N1_Im(...) defines a constraint set with 1 output and <m> inputs, * except that the output must use a new register. + * + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> + * inputs, except that the first <n> outputs must use new registers. */ C_O0_I1(r) C_O0_I2(L, L) @@ -53,4 +56,4 @@ C_O2_I1(r, r, L) C_O2_I2(a, d, a, r) C_O2_I2(r, r, L, L) C_O2_I3(a, d, 0, 1, r) -C_O2_I4(r, r, 0, 1, re, re) +C_N1_O1_I4(r, r, 0, 1, re, re) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index ab997b5fb3..77482da070 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3335,7 +3335,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add2_i64: case INDEX_op_sub2_i32: case INDEX_op_sub2_i64: - return C_O2_I4(r, r, 0, 1, re, re); + return C_N1_O1_I4(r, r, 0, 1, re, re); case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index c866f2c997..511e14b180 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2496,11 +2496,10 @@ static void tcg_out_goto_tb(TCGContext *s, int which) ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); - /* Direct branch will be patched by tb_target_set_jmp_target. */ + /* TODO: Use direct branches when possible. */ set_jmp_insn_offset(s, which); tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); - /* When branch is out of range, fall through to indirect. */ tcg_out32(s, BCCTR | BO_ALWAYS); /* For the unlinked case, need to reset TCG_REG_TB. */ @@ -2528,10 +2527,12 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, intptr_t diff = addr - jmp_rx; tcg_insn_unit insn; + if (USE_REG_TB) { + return; + } + if (in_range_b(diff)) { insn = B | (diff & 0x3fffffc); - } else if (USE_REG_TB) { - insn = MTSPR | RS(TCG_REG_TB) | CTR; } else { insn = NOP; } diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index cbad91b2b5..9a42037499 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -8,6 +8,9 @@ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs. * Each operand should be a sequence of constraint letters as defined by * tcg-target-con-str.h; the constraint combination is inclusive or. + * + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> + * inputs, except that the first <n> outputs must use new registers. */ C_O0_I1(r) C_O0_I2(r, r) @@ -41,6 +44,5 @@ C_O2_I1(o, m, r) C_O2_I2(o, m, 0, r) C_O2_I2(o, m, r, r) C_O2_I3(o, m, 0, 1, r) -C_O2_I4(r, r, 0, 1, rA, r) -C_O2_I4(r, r, 0, 1, ri, r) -C_O2_I4(r, r, 0, 1, r, r) +C_N1_O1_I4(r, r, 0, 1, ri, r) +C_N1_O1_I4(r, r, 0, 1, rA, r) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index a878acd8ca..a94f7908d6 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3229,11 +3229,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add2_i32: case INDEX_op_sub2_i32: - return C_O2_I4(r, r, 0, 1, ri, r); + return C_N1_O1_I4(r, r, 0, 1, ri, r); case INDEX_op_add2_i64: case INDEX_op_sub2_i64: - return C_O2_I4(r, r, 0, 1, rA, r); + return C_N1_O1_I4(r, r, 0, 1, rA, r); case INDEX_op_st_vec: return C_O0_I2(v, r); @@ -648,6 +648,7 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), typedef enum { #include "tcg-target-con-set.h" @@ -668,6 +669,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); #undef C_O2_I2 #undef C_O2_I3 #undef C_O2_I4 +#undef C_N1_O1_I4 /* Put all of the constraint sets into an array, indexed by the enum. */ @@ -687,6 +689,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, static const TCGTargetOpDef constraint_sets[] = { #include "tcg-target-con-set.h" @@ -706,6 +709,7 @@ static const TCGTargetOpDef constraint_sets[] = { #undef C_O2_I2 #undef C_O2_I3 #undef C_O2_I4 +#undef C_N1_O1_I4 /* Expand the enumerator to be returned from tcg_target_op_def(). */ @@ -725,6 +729,7 @@ static const TCGTargetOpDef constraint_sets[] = { #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) #include "tcg-target.c.inc" @@ -4703,7 +4708,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) * dead after the instruction, we must allocate a new * register and move it. */ - if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { + if (temp_readonly(ts) || !IS_DEAD_ARG(i) + || def->args_ct[arg_ct->alias_index].newreg) { allocate_new_reg = true; } else if (ts->val_type == TEMP_VAL_REG) { /* |