From 1f6f2b34ad2b9522e3db898550c90dfc4b4c44ed Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 13 Sep 2019 11:29:35 -0400 Subject: exec: Use TARGET_PAGE_BITS_MIN for TLB flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These bits do not need to vary with the actual page size used by the guest. Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- include/exec/cpu-all.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index d2d443c4f9..e0c8dc540c 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -317,20 +317,24 @@ CPUArchState *cpu_copy(CPUArchState *env); #if !defined(CONFIG_USER_ONLY) -/* Flags stored in the low bits of the TLB virtual address. These are - * defined so that fast path ram access is all zeros. +/* + * Flags stored in the low bits of the TLB virtual address. + * These are defined so that fast path ram access is all zeros. * The flags all must be between TARGET_PAGE_BITS and * maximum address alignment bit. + * + * Use TARGET_PAGE_BITS_MIN so that these bits are constant + * when TARGET_PAGE_BITS_VARY is in effect. */ /* Zero if TLB entry is valid. */ -#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS - 1)) +#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS_MIN - 1)) /* Set if TLB entry references a clean RAM page. The iotlb entry will contain the page physical address. */ -#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS - 2)) +#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2)) /* Set if TLB entry is an IO callback. */ -#define TLB_MMIO (1 << (TARGET_PAGE_BITS - 3)) +#define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) /* Set if TLB entry contains a watchpoint. */ -#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS - 4)) +#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) /* Use this mask to check interception with an alignment mask * in a TCG backend. -- cgit v1.2.3 From c6b716cdc0f993e43b19849df1f1698723132246 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 10 Sep 2019 12:02:36 -0400 Subject: cputlb: Disable __always_inline__ without optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This forced inlining can result in missing symbols, which makes a debugging build harder to follow. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: David Hildenbrand Reported-by: Peter Maydell Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 4 ++-- include/qemu/compiler.h | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index abae79650c..2222b87764 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1281,7 +1281,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr); -static inline uint64_t __attribute__((always_inline)) +static inline uint64_t QEMU_ALWAYS_INLINE load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr, MemOp op, bool code_read, FullLoadHelper *full_load) @@ -1530,7 +1530,7 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, * Store Helpers */ -static inline void __attribute__((always_inline)) +static inline void QEMU_ALWAYS_INLINE store_helper(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) { diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 09fc44cca4..20780e722d 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -170,6 +170,17 @@ # define QEMU_NONSTRING #endif +/* + * Forced inlining may be desired to encourage constant propagation + * of function parameters. However, it can also make debugging harder, + * so disable it for a non-optimizing build. + */ +#if defined(__OPTIMIZE__) +#define QEMU_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define QEMU_ALWAYS_INLINE +#endif + /* Implement C11 _Generic via GCC builtins. Example: * * QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x) -- cgit v1.2.3 From 560e36d5a42e04aa2871c01da6d432592151f7e8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 23 Sep 2019 11:24:12 -0700 Subject: qemu/compiler.h: Add qemu_build_not_reached MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use this as a compile-time assert that a particular code path is not reachable. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- include/qemu/compiler.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 20780e722d..7b93c73340 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -221,4 +221,19 @@ #define QEMU_GENERIC9(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC8(x, __VA_ARGS__)) #define QEMU_GENERIC10(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC9(x, __VA_ARGS__)) +/** + * qemu_build_not_reached() + * + * The compiler, during optimization, is expected to prove that a call + * to this function cannot be reached and remove it. If the compiler + * supports QEMU_ERROR, this will be reported at compile time; otherwise + * this will be reported at link time due to the missing symbol. + */ +#ifdef __OPTIMIZE__ +extern void QEMU_NORETURN QEMU_ERROR("code path is reachable") + qemu_build_not_reached(void); +#else +#define qemu_build_not_reached() g_assert_not_reached() +#endif + #endif /* COMPILER_H */ -- cgit v1.2.3 From 86b7b9c507b52a430fe3c644c0afbc6adfb92869 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 23 Sep 2019 14:14:31 -0700 Subject: cputlb: Use qemu_build_not_reached in load/store_helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase the current runtime assert to a compile-time assert. Reviewed-by: David Hildenbrand Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 2222b87764..e31378bce3 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1396,7 +1396,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, res = ldq_le_p(haddr); break; default: - g_assert_not_reached(); + qemu_build_not_reached(); } return res; @@ -1680,8 +1680,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, stq_le_p(haddr, val); break; default: - g_assert_not_reached(); - break; + qemu_build_not_reached(); } } -- cgit v1.2.3 From 80d9d1c678557df75587359dc99fda1441fabee2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 10 Sep 2019 14:56:12 -0400 Subject: cputlb: Split out load/store_memop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will shortly be using these more than once. Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 107 +++++++++++++++++++++++++++-------------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e31378bce3..eeba8c9847 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1281,6 +1281,29 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr); +static inline uint64_t QEMU_ALWAYS_INLINE +load_memop(const void *haddr, MemOp op) +{ + switch (op) { + case MO_UB: + return ldub_p(haddr); + case MO_BEUW: + return lduw_be_p(haddr); + case MO_LEUW: + return lduw_le_p(haddr); + case MO_BEUL: + return (uint32_t)ldl_be_p(haddr); + case MO_LEUL: + return (uint32_t)ldl_le_p(haddr); + case MO_BEQ: + return ldq_be_p(haddr); + case MO_LEQ: + return ldq_le_p(haddr); + default: + qemu_build_not_reached(); + } +} + static inline uint64_t QEMU_ALWAYS_INLINE load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr, MemOp op, bool code_read, @@ -1373,33 +1396,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, do_aligned_access: haddr = (void *)((uintptr_t)addr + entry->addend); - switch (op) { - case MO_UB: - res = ldub_p(haddr); - break; - case MO_BEUW: - res = lduw_be_p(haddr); - break; - case MO_LEUW: - res = lduw_le_p(haddr); - break; - case MO_BEUL: - res = (uint32_t)ldl_be_p(haddr); - break; - case MO_LEUL: - res = (uint32_t)ldl_le_p(haddr); - break; - case MO_BEQ: - res = ldq_be_p(haddr); - break; - case MO_LEQ: - res = ldq_le_p(haddr); - break; - default: - qemu_build_not_reached(); - } - - return res; + return load_memop(haddr, op); } /* @@ -1530,6 +1527,36 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, * Store Helpers */ +static inline void QEMU_ALWAYS_INLINE +store_memop(void *haddr, uint64_t val, MemOp op) +{ + switch (op) { + case MO_UB: + stb_p(haddr, val); + break; + case MO_BEUW: + stw_be_p(haddr, val); + break; + case MO_LEUW: + stw_le_p(haddr, val); + break; + case MO_BEUL: + stl_be_p(haddr, val); + break; + case MO_LEUL: + stl_le_p(haddr, val); + break; + case MO_BEQ: + stq_be_p(haddr, val); + break; + case MO_LEQ: + stq_le_p(haddr, val); + break; + default: + qemu_build_not_reached(); + } +} + static inline void QEMU_ALWAYS_INLINE store_helper(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) @@ -1657,31 +1684,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, do_aligned_access: haddr = (void *)((uintptr_t)addr + entry->addend); - switch (op) { - case MO_UB: - stb_p(haddr, val); - break; - case MO_BEUW: - stw_be_p(haddr, val); - break; - case MO_LEUW: - stw_le_p(haddr, val); - break; - case MO_BEUL: - stl_be_p(haddr, val); - break; - case MO_LEUL: - stl_le_p(haddr, val); - break; - case MO_BEQ: - stq_be_p(haddr, val); - break; - case MO_LEQ: - stq_le_p(haddr, val); - break; - default: - qemu_build_not_reached(); - } + store_memop(haddr, val, op); } void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, -- cgit v1.2.3 From 5b87b3e671c00fd054bcbf4668d99d56b33c598c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 10 Sep 2019 15:47:39 -0400 Subject: cputlb: Introduce TLB_BSWAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle bswap on ram directly in load/store_helper. This fixes a bug with the previous implementation in that one cannot use the I/O path for RAM. Fixes: a26fc6f5152b47f1 Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 72 ++++++++++++++++++++++++++++++-------------------- include/exec/cpu-all.h | 4 ++- 2 files changed, 46 insertions(+), 30 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index eeba8c9847..028eebcb44 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -737,8 +737,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, address |= TLB_INVALID_MASK; } if (attrs.byte_swap) { - /* Force the access through the I/O slow path. */ - address |= TLB_MMIO; + address |= TLB_BSWAP; } if (!memory_region_is_ram(section->mr) && !memory_region_is_romd(section->mr)) { @@ -901,10 +900,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (iotlbentry->attrs.byte_swap) { - op ^= MO_BSWAP; - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -947,10 +942,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (iotlbentry->attrs.byte_swap) { - op ^= MO_BSWAP; - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -1133,8 +1124,8 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, wp_access, retaddr); } - if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) { - /* I/O access */ + /* Reject I/O access, or other required slow-path. */ + if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) { return NULL; } @@ -1344,6 +1335,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, /* Handle anything that isn't just a straight memory access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { CPUIOTLBEntry *iotlbentry; + bool need_swap; /* For anything that is unaligned, recurse through full_load. */ if ((addr & (size - 1)) != 0) { @@ -1357,17 +1349,27 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, /* On watchpoint hit, this will longjmp out. */ cpu_check_watchpoint(env_cpu(env), addr, size, iotlbentry->attrs, BP_MEM_READ, retaddr); - - /* The backing page may or may not require I/O. */ - tlb_addr &= ~TLB_WATCHPOINT; - if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) { - goto do_aligned_access; - } } + need_swap = size > 1 && (tlb_addr & TLB_BSWAP); + /* Handle I/O access. */ - return io_readx(env, iotlbentry, mmu_idx, addr, - retaddr, access_type, op); + if (likely(tlb_addr & TLB_MMIO)) { + return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, + access_type, op ^ (need_swap * MO_BSWAP)); + } + + haddr = (void *)((uintptr_t)addr + entry->addend); + + /* + * Keep these two load_memop separate to ensure that the compiler + * is able to fold the entire function to a single instruction. + * There is a build-time assert inside to remind you of this. ;-) + */ + if (unlikely(need_swap)) { + return load_memop(haddr, op ^ MO_BSWAP); + } + return load_memop(haddr, op); } /* Handle slow unaligned access (it spans two pages or IO). */ @@ -1394,7 +1396,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, return res & MAKE_64BIT_MASK(0, size * 8); } - do_aligned_access: haddr = (void *)((uintptr_t)addr + entry->addend); return load_memop(haddr, op); } @@ -1591,6 +1592,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* Handle anything that isn't just a straight memory access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { CPUIOTLBEntry *iotlbentry; + bool need_swap; /* For anything that is unaligned, recurse through byte stores. */ if ((addr & (size - 1)) != 0) { @@ -1604,16 +1606,29 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* On watchpoint hit, this will longjmp out. */ cpu_check_watchpoint(env_cpu(env), addr, size, iotlbentry->attrs, BP_MEM_WRITE, retaddr); - - /* The backing page may or may not require I/O. */ - tlb_addr &= ~TLB_WATCHPOINT; - if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) { - goto do_aligned_access; - } } + need_swap = size > 1 && (tlb_addr & TLB_BSWAP); + /* Handle I/O access. */ - io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op); + if (likely(tlb_addr & (TLB_MMIO | TLB_NOTDIRTY))) { + io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, + op ^ (need_swap * MO_BSWAP)); + return; + } + + haddr = (void *)((uintptr_t)addr + entry->addend); + + /* + * Keep these two store_memop separate to ensure that the compiler + * is able to fold the entire function to a single instruction. + * There is a build-time assert inside to remind you of this. ;-) + */ + if (unlikely(need_swap)) { + store_memop(haddr, val, op ^ MO_BSWAP); + } else { + store_memop(haddr, val, op); + } return; } @@ -1682,7 +1697,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } - do_aligned_access: haddr = (void *)((uintptr_t)addr + entry->addend); store_memop(haddr, val, op); } diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index e0c8dc540c..d148bded35 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -335,12 +335,14 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) /* Set if TLB entry contains a watchpoint. */ #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) +/* Set if TLB entry requires byte swap. */ +#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) /* Use this mask to check interception with an alignment mask * in a TCG backend. */ #define TLB_FLAGS_MASK \ - (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT) + (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP) /** * tlb_hit_page: return true if page aligned @addr is a hit against the -- cgit v1.2.3 From 6e050d415662ca97b71410b8c4d91f789872b407 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 18 Sep 2019 03:39:56 +0000 Subject: exec: Adjust notdirty tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory_region_tb_read tracepoint is unreachable, since notdirty is supposed to apply only to writes. The memory_region_tb_write tracepoint is mis-named, because notdirty is not only used for TB invalidation. It is also used for e.g. VGA RAM updates and migration. Replace memory_region_tb_write with memory_notdirty_write_access, and place it in memory_notdirty_write_prepare where it can catch all of the instances. Add memory_notdirty_set_dirty to log when we no longer intercept writes to a page. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- exec.c | 3 +++ memory.c | 4 ---- trace-events | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/exec.c b/exec.c index 8b998974f8..5f2587b621 100644 --- a/exec.c +++ b/exec.c @@ -2755,6 +2755,8 @@ void memory_notdirty_write_prepare(NotDirtyInfo *ndi, ndi->size = size; ndi->pages = NULL; + trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); + assert(tcg_enabled()); if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { ndi->pages = page_collection_lock(ram_addr, ram_addr + size); @@ -2779,6 +2781,7 @@ void memory_notdirty_write_complete(NotDirtyInfo *ndi) /* we remove the notdirty callback only if the code has been flushed */ if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { + trace_memory_notdirty_set_dirty(ndi->mem_vaddr); tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); } } diff --git a/memory.c b/memory.c index b9dd6b94ca..57c44c97db 100644 --- a/memory.c +++ b/memory.c @@ -438,7 +438,6 @@ static MemTxResult memory_region_read_accessor(MemoryRegion *mr, /* Accesses to code which has previously been translated into a TB show * up in the MMIO path, as accesses to the io_mem_notdirty * MemoryRegion. */ - trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -465,7 +464,6 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr, /* Accesses to code which has previously been translated into a TB show * up in the MMIO path, as accesses to the io_mem_notdirty * MemoryRegion. */ - trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -490,7 +488,6 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr, /* Accesses to code which has previously been translated into a TB show * up in the MMIO path, as accesses to the io_mem_notdirty * MemoryRegion. */ - trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); @@ -515,7 +512,6 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr, /* Accesses to code which has previously been translated into a TB show * up in the MMIO path, as accesses to the io_mem_notdirty * MemoryRegion. */ - trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); diff --git a/trace-events b/trace-events index 823a4ae64e..20821ba545 100644 --- a/trace-events +++ b/trace-events @@ -52,14 +52,14 @@ dma_map_wait(void *dbs) "dbs=%p" find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64 find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64 ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d" +memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u" +memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64 # memory.c memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u" memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u" -memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr 0x%"PRIx64" value 0x%"PRIx64" size %u" -memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr 0x%"PRIx64" value 0x%"PRIx64" size %u" memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" flatview_new(void *view, void *root) "%p (root %p)" -- cgit v1.2.3 From 7b0d792ce13c0e894ee32a94b321b329724c9a25 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 19 Sep 2019 17:54:10 -0700 Subject: cputlb: Move ROM handling from I/O path to TLB path It does not require going through the whole I/O path in order to discard a write. Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 36 +++++++++++++++++++++--------------- exec.c | 41 +---------------------------------------- include/exec/cpu-all.h | 5 ++++- include/exec/cpu-common.h | 1 - 4 files changed, 26 insertions(+), 57 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 028eebcb44..404ec57a4e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -577,7 +577,8 @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, { uintptr_t addr = tlb_entry->addr_write; - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { + if ((addr & (TLB_INVALID_MASK | TLB_MMIO | + TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { addr &= TARGET_PAGE_MASK; addr += tlb_entry->addend; if ((addr - start) < length) { @@ -745,7 +746,6 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, address |= TLB_MMIO; addend = 0; } else { - /* TLB_MMIO for rom/romd handled below */ addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; } @@ -822,16 +822,17 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, tn.addr_write = -1; if (prot & PAGE_WRITE) { - if ((memory_region_is_ram(section->mr) && section->readonly) - || memory_region_is_romd(section->mr)) { - /* Write access calls the I/O callback. */ - tn.addr_write = address | TLB_MMIO; - } else if (memory_region_is_ram(section->mr) - && cpu_physical_memory_is_clean( - memory_region_get_ram_addr(section->mr) + xlat)) { - tn.addr_write = address | TLB_NOTDIRTY; - } else { - tn.addr_write = address; + tn.addr_write = address; + if (memory_region_is_romd(section->mr)) { + /* Use the MMIO path so that the device can switch states. */ + tn.addr_write |= TLB_MMIO; + } else if (memory_region_is_ram(section->mr)) { + if (section->readonly) { + tn.addr_write |= TLB_DISCARD_WRITE; + } else if (cpu_physical_memory_is_clean( + memory_region_get_ram_addr(section->mr) + xlat)) { + tn.addr_write |= TLB_NOTDIRTY; + } } if (prot & PAGE_WRITE_INV) { tn.addr_write |= TLB_INVALID_MASK; @@ -904,7 +905,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + if (mr != &io_mem_notdirty && !cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } @@ -945,7 +946,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + if (mr != &io_mem_notdirty && !cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } cpu->mem_io_vaddr = addr; @@ -1125,7 +1126,7 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, } /* Reject I/O access, or other required slow-path. */ - if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) { + if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { return NULL; } @@ -1617,6 +1618,11 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } + /* Ignore writes to ROM. */ + if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { + return; + } + haddr = (void *)((uintptr_t)addr + entry->addend); /* diff --git a/exec.c b/exec.c index 5f2587b621..ea8c0b18ac 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,7 @@ static MemoryRegion *system_io; AddressSpace address_space_io; AddressSpace address_space_memory; -MemoryRegion io_mem_rom, io_mem_notdirty; +MemoryRegion io_mem_notdirty; static MemoryRegion io_mem_unassigned; #endif @@ -192,7 +192,6 @@ typedef struct subpage_t { #define PHYS_SECTION_UNASSIGNED 0 #define PHYS_SECTION_NOTDIRTY 1 -#define PHYS_SECTION_ROM 2 static void io_mem_init(void); static void memory_map_init(void); @@ -1475,8 +1474,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, iotlb = memory_region_get_ram_addr(section->mr) + xlat; if (!section->readonly) { iotlb |= PHYS_SECTION_NOTDIRTY; - } else { - iotlb |= PHYS_SECTION_ROM; } } else { AddressSpaceDispatch *d; @@ -3002,38 +2999,6 @@ static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) return phys_section_add(map, §ion); } -static void readonly_mem_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - /* Ignore any write to ROM. */ -} - -static bool readonly_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -/* This will only be used for writes, because reads are special cased - * to directly access the underlying host ram. - */ -static const MemoryRegionOps readonly_mem_ops = { - .write = readonly_mem_write, - .valid.accepts = readonly_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - MemoryRegionSection *iotlb_to_section(CPUState *cpu, hwaddr index, MemTxAttrs attrs) { @@ -3047,8 +3012,6 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, static void io_mem_init(void) { - memory_region_init_io(&io_mem_rom, NULL, &readonly_mem_ops, - NULL, NULL, UINT64_MAX); memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); @@ -3069,8 +3032,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) assert(n == PHYS_SECTION_UNASSIGNED); n = dummy_section(&d->map, fv, &io_mem_notdirty); assert(n == PHYS_SECTION_NOTDIRTY); - n = dummy_section(&d->map, fv, &io_mem_rom); - assert(n == PHYS_SECTION_ROM); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index d148bded35..ad9ab85eb3 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -337,12 +337,15 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) /* Set if TLB entry requires byte swap. */ #define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) +/* Set if TLB entry writes ignored. */ +#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) /* Use this mask to check interception with an alignment mask * in a TCG backend. */ #define TLB_FLAGS_MASK \ - (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP) + (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ + | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE) /** * tlb_hit_page: return true if page aligned @addr is a hit against the diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index f7dbe75fbc..1c0e03ddc2 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -100,7 +100,6 @@ void qemu_flush_coalesced_mmio_buffer(void); void cpu_flush_icache_range(hwaddr start, hwaddr len); -extern struct MemoryRegion io_mem_rom; extern struct MemoryRegion io_mem_notdirty; typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); -- cgit v1.2.3 From 08565552f70ca37da13f24928727f851073cd13e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 18 Sep 2019 09:15:44 -0700 Subject: cputlb: Move NOTDIRTY handling from I/O path to TLB path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pages that we want to track for NOTDIRTY are RAM. We do not really need to go through the I/O path to handle them. Acked-by: David Hildenbrand Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 26 +++++++++++++++++++++--- exec.c | 50 ----------------------------------------------- include/exec/cpu-common.h | 2 -- memory.c | 16 --------------- 4 files changed, 23 insertions(+), 71 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 404ec57a4e..7e9a0f7ac8 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -905,7 +905,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; - if (mr != &io_mem_notdirty && !cpu->can_do_io) { + if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } @@ -946,7 +946,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - if (mr != &io_mem_notdirty && !cpu->can_do_io) { + if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } cpu->mem_io_vaddr = addr; @@ -1612,7 +1612,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, need_swap = size > 1 && (tlb_addr & TLB_BSWAP); /* Handle I/O access. */ - if (likely(tlb_addr & (TLB_MMIO | TLB_NOTDIRTY))) { + if (tlb_addr & TLB_MMIO) { io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op ^ (need_swap * MO_BSWAP)); return; @@ -1625,6 +1625,26 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, haddr = (void *)((uintptr_t)addr + entry->addend); + /* Handle clean RAM pages. */ + if (tlb_addr & TLB_NOTDIRTY) { + NotDirtyInfo ndi; + + /* We require mem_io_pc in tb_invalidate_phys_page_range. */ + env_cpu(env)->mem_io_pc = retaddr; + + memory_notdirty_write_prepare(&ndi, env_cpu(env), addr, + addr + iotlbentry->addr, size); + + if (unlikely(need_swap)) { + store_memop(haddr, val, op ^ MO_BSWAP); + } else { + store_memop(haddr, val, op); + } + + memory_notdirty_write_complete(&ndi); + return; + } + /* * Keep these two store_memop separate to ensure that the compiler * is able to fold the entire function to a single instruction. diff --git a/exec.c b/exec.c index ea8c0b18ac..dc7001f115 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,6 @@ static MemoryRegion *system_io; AddressSpace address_space_io; AddressSpace address_space_memory; -MemoryRegion io_mem_notdirty; static MemoryRegion io_mem_unassigned; #endif @@ -191,7 +190,6 @@ typedef struct subpage_t { } subpage_t; #define PHYS_SECTION_UNASSIGNED 0 -#define PHYS_SECTION_NOTDIRTY 1 static void io_mem_init(void); static void memory_map_init(void); @@ -1472,9 +1470,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, if (memory_region_is_ram(section->mr)) { /* Normal RAM. */ iotlb = memory_region_get_ram_addr(section->mr) + xlat; - if (!section->readonly) { - iotlb |= PHYS_SECTION_NOTDIRTY; - } } else { AddressSpaceDispatch *d; @@ -2783,42 +2778,6 @@ void memory_notdirty_write_complete(NotDirtyInfo *ndi) } } -/* Called within RCU critical section. */ -static void notdirty_mem_write(void *opaque, hwaddr ram_addr, - uint64_t val, unsigned size) -{ - NotDirtyInfo ndi; - - memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr, - ram_addr, size); - - stn_p(qemu_map_ram_ptr(NULL, ram_addr), size, val); - memory_notdirty_write_complete(&ndi); -} - -static bool notdirty_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -static const MemoryRegionOps notdirty_mem_ops = { - .write = notdirty_mem_write, - .valid.accepts = notdirty_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - /* Generate a debug exception if a watchpoint has been hit. */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) @@ -3014,13 +2973,6 @@ static void io_mem_init(void) { memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); - - /* io_mem_notdirty calls tb_invalidate_phys_page_fast, - * which can be called without the iothread mutex. - */ - memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, - NULL, UINT64_MAX); - memory_region_clear_global_locking(&io_mem_notdirty); } AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) @@ -3030,8 +2982,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) n = dummy_section(&d->map, fv, &io_mem_unassigned); assert(n == PHYS_SECTION_UNASSIGNED); - n = dummy_section(&d->map, fv, &io_mem_notdirty); - assert(n == PHYS_SECTION_NOTDIRTY); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 1c0e03ddc2..81753bbb34 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -100,8 +100,6 @@ void qemu_flush_coalesced_mmio_buffer(void); void cpu_flush_icache_range(hwaddr start, hwaddr len); -extern struct MemoryRegion io_mem_notdirty; - typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); diff --git a/memory.c b/memory.c index 57c44c97db..a99b8c0767 100644 --- a/memory.c +++ b/memory.c @@ -434,10 +434,6 @@ static MemTxResult memory_region_read_accessor(MemoryRegion *mr, tmp = mr->ops->read(mr->opaque, addr, size); if (mr->subpage) { trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -460,10 +456,6 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr, r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs); if (mr->subpage) { trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -484,10 +476,6 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr, if (mr->subpage) { trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); @@ -508,10 +496,6 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr, if (mr->subpage) { trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); -- cgit v1.2.3 From 8f5db6415363740d4eac070bb381202c80a7fc34 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 19 Sep 2019 21:09:58 -0700 Subject: cputlb: Partially inline memory_region_section_get_iotlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one caller, tlb_set_page_with_attrs. We cannot inline the entire function because the AddressSpaceDispatch structure is private to exec.c, and cannot easily be moved to include/exec/memory-internal.h. Compute is_ram and is_romd once within tlb_set_page_with_attrs. Fold the number of tests against these predicates. Compute cpu_physical_memory_is_clean outside of the tlb lock region. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 66 +++++++++++++++++++++++++++++++------------------ exec.c | 22 +++-------------- include/exec/exec-all.h | 6 +---- 3 files changed, 46 insertions(+), 48 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 7e9a0f7ac8..4f118d2cc9 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -705,13 +705,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, MemoryRegionSection *section; unsigned int index; target_ulong address; - target_ulong code_address; + target_ulong write_address; uintptr_t addend; CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; target_ulong vaddr_page; int asidx = cpu_asidx_from_attrs(cpu, attrs); int wp_flags; + bool is_ram, is_romd; assert_cpu_is_self(cpu); @@ -740,18 +741,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, if (attrs.byte_swap) { address |= TLB_BSWAP; } - if (!memory_region_is_ram(section->mr) && - !memory_region_is_romd(section->mr)) { - /* IO memory case */ - address |= TLB_MMIO; + + is_ram = memory_region_is_ram(section->mr); + is_romd = memory_region_is_romd(section->mr); + + if (is_ram || is_romd) { + /* RAM and ROMD both have associated host memory. */ + addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; + } else { + /* I/O does not; force the host address to NULL. */ addend = 0; + } + + write_address = address; + if (is_ram) { + iotlb = memory_region_get_ram_addr(section->mr) + xlat; + /* + * Computing is_clean is expensive; avoid all that unless + * the page is actually writable. + */ + if (prot & PAGE_WRITE) { + if (section->readonly) { + write_address |= TLB_DISCARD_WRITE; + } else if (cpu_physical_memory_is_clean(iotlb)) { + write_address |= TLB_NOTDIRTY; + } + } } else { - addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; + /* I/O or ROMD */ + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; + /* + * Writes to romd devices must go through MMIO to enable write. + * Reads to romd devices go through the ram_ptr found above, + * but of course reads to I/O must go through MMIO. + */ + write_address |= TLB_MMIO; + if (!is_romd) { + address = write_address; + } } - code_address = address; - iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, - paddr_page, xlat, prot, &address); wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, TARGET_PAGE_SIZE); @@ -791,8 +820,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, /* * At this point iotlb contains a physical section number in the lower * TARGET_PAGE_BITS, and either - * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM) - * + the offset within section->mr of the page base (otherwise) + * + the ram_addr_t of the page base of the target RAM (RAM) + * + the offset within section->mr of the page base (I/O, ROMD) * We subtract the vaddr_page (which is page aligned and thus won't * disturb the low bits) to give an offset which can be added to the * (non-page-aligned) vaddr of the eventual memory access to get @@ -815,25 +844,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } if (prot & PAGE_EXEC) { - tn.addr_code = code_address; + tn.addr_code = address; } else { tn.addr_code = -1; } tn.addr_write = -1; if (prot & PAGE_WRITE) { - tn.addr_write = address; - if (memory_region_is_romd(section->mr)) { - /* Use the MMIO path so that the device can switch states. */ - tn.addr_write |= TLB_MMIO; - } else if (memory_region_is_ram(section->mr)) { - if (section->readonly) { - tn.addr_write |= TLB_DISCARD_WRITE; - } else if (cpu_physical_memory_is_clean( - memory_region_get_ram_addr(section->mr) + xlat)) { - tn.addr_write |= TLB_NOTDIRTY; - } - } + tn.addr_write = write_address; if (prot & PAGE_WRITE_INV) { tn.addr_write |= TLB_INVALID_MASK; } diff --git a/exec.c b/exec.c index dc7001f115..961d7d6497 100644 --- a/exec.c +++ b/exec.c @@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, /* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address) + MemoryRegionSection *section) { - hwaddr iotlb; - - if (memory_region_is_ram(section->mr)) { - /* Normal RAM. */ - iotlb = memory_region_get_ram_addr(section->mr) + xlat; - } else { - AddressSpaceDispatch *d; - - d = flatview_to_dispatch(section->fv); - iotlb = section - d->map.sections; - iotlb += xlat; - } - - return iotlb; + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); + return section - d->map.sections; } #endif /* defined(CONFIG_USER_ONLY) */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 81b02eb2fe..49db07ba0b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr *xlat, hwaddr *plen, MemTxAttrs attrs, int *prot); hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address); + MemoryRegionSection *section); #endif /* vl.c */ -- cgit v1.2.3 From 707526ad865dc4064c3984bcc061596a21bf9d3b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 18:47:59 -0700 Subject: cputlb: Merge and move memory_notdirty_write_{prepare,complete} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 9458a9a1df1a, all readers of the dirty bitmaps wait for the rcu lock, which means that they wait until the end of any executing TranslationBlock. As a consequence, there is no need for the actual access to happen in between the _prepare and _complete. Therefore, we can improve things by merging the two functions into notdirty_write and dropping the NotDirtyInfo structure. In addition, the only users of notdirty_write are in cputlb.c, so move the merged function there. Pass in the CPUIOTLBEntry from which the ram_addr_t may be computed. Reviewed-by: David Hildenbrand Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 76 +++++++++++++++++++++++------------------- exec.c | 44 ------------------------ include/exec/memory-internal.h | 65 ------------------------------------ 3 files changed, 42 insertions(+), 143 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 4f118d2cc9..3e91838519 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -33,6 +33,7 @@ #include "exec/helper-proto.h" #include "qemu/atomic.h" #include "qemu/atomic128.h" +#include "translate-all.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -1085,6 +1086,37 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, + CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) +{ + ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; + + trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); + + if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { + struct page_collection *pages + = page_collection_lock(ram_addr, ram_addr + size); + + /* We require mem_io_pc in tb_invalidate_phys_page_range. */ + cpu->mem_io_pc = retaddr; + + tb_invalidate_phys_page_fast(pages, ram_addr, size); + page_collection_unlock(pages); + } + + /* + * Set both VGA and migration bits for simplicity and to remove + * the notdirty callback faster. + */ + cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); + + /* We remove the notdirty callback only if the code has been flushed. */ + if (!cpu_physical_memory_is_clean(ram_addr)) { + trace_memory_notdirty_set_dirty(mem_vaddr); + tlb_set_dirty(cpu, mem_vaddr); + } +} + /* * Probe for whether the specified guest access is permitted. If it is not * permitted then an exception will be taken in the same way as if this @@ -1204,8 +1236,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, /* Probe for a read-modify-write atomic operation. Do not allow unaligned * operations, or io operations to proceed. Return the host address. */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr, - NotDirtyInfo *ndi) + TCGMemOpIdx oi, uintptr_t retaddr) { size_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); @@ -1265,12 +1296,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, hostaddr = (void *)((uintptr_t)addr + tlbe->addend); - ndi->active = false; if (unlikely(tlb_addr & TLB_NOTDIRTY)) { - ndi->active = true; - memory_notdirty_write_prepare(ndi, env_cpu(env), addr, - qemu_ram_addr_from_host_nofail(hostaddr), - 1 << s_bits); + notdirty_write(env_cpu(env), addr, 1 << s_bits, + &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); } return hostaddr; @@ -1641,28 +1669,13 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } - haddr = (void *)((uintptr_t)addr + entry->addend); - /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - NotDirtyInfo ndi; - - /* We require mem_io_pc in tb_invalidate_phys_page_range. */ - env_cpu(env)->mem_io_pc = retaddr; - - memory_notdirty_write_prepare(&ndi, env_cpu(env), addr, - addr + iotlbentry->addr, size); - - if (unlikely(need_swap)) { - store_memop(haddr, val, op ^ MO_BSWAP); - } else { - store_memop(haddr, val, op); - } - - memory_notdirty_write_complete(&ndi); - return; + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); } + haddr = (void *)((uintptr_t)addr + entry->addend); + /* * Keep these two store_memop separate to ensure that the compiler * is able to fold the entire function to a single instruction. @@ -1793,14 +1806,9 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr #define ATOMIC_NAME(X) \ HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) -#define ATOMIC_MMU_DECLS NotDirtyInfo ndi -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi) -#define ATOMIC_MMU_CLEANUP \ - do { \ - if (unlikely(ndi.active)) { \ - memory_notdirty_write_complete(&ndi); \ - } \ - } while (0) +#define ATOMIC_MMU_DECLS +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) +#define ATOMIC_MMU_CLEANUP #define DATA_SIZE 1 #include "atomic_template.h" @@ -1828,7 +1836,7 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #undef ATOMIC_MMU_LOOKUP #define EXTRA_ARGS , TCGMemOpIdx oi #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) #define DATA_SIZE 1 #include "atomic_template.h" diff --git a/exec.c b/exec.c index 961d7d6497..7d835b1a2b 100644 --- a/exec.c +++ b/exec.c @@ -2718,50 +2718,6 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) return block->offset + offset; } -/* Called within RCU critical section. */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size) -{ - ndi->cpu = cpu; - ndi->ram_addr = ram_addr; - ndi->mem_vaddr = mem_vaddr; - ndi->size = size; - ndi->pages = NULL; - - trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); - - assert(tcg_enabled()); - if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - ndi->pages = page_collection_lock(ram_addr, ram_addr + size); - tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size); - } -} - -/* Called within RCU critical section. */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi) -{ - if (ndi->pages) { - assert(tcg_enabled()); - page_collection_unlock(ndi->pages); - ndi->pages = NULL; - } - - /* Set both VGA and migration bits for simplicity and to remove - * the notdirty callback faster. - */ - cpu_physical_memory_set_dirty_range(ndi->ram_addr, ndi->size, - DIRTY_CLIENTS_NOCODE); - /* we remove the notdirty callback only if the code has been - flushed */ - if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { - trace_memory_notdirty_set_dirty(ndi->mem_vaddr); - tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); - } -} - /* Generate a debug exception if a watchpoint has been hit. */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h index ef4fb92371..9fcc2af25c 100644 --- a/include/exec/memory-internal.h +++ b/include/exec/memory-internal.h @@ -49,70 +49,5 @@ void address_space_dispatch_free(AddressSpaceDispatch *d); void mtree_print_dispatch(struct AddressSpaceDispatch *d, MemoryRegion *root); - -struct page_collection; - -/* Opaque struct for passing info from memory_notdirty_write_prepare() - * to memory_notdirty_write_complete(). Callers should treat all fields - * as private, with the exception of @active. - * - * @active is a field which is not touched by either the prepare or - * complete functions, but which the caller can use if it wishes to - * track whether it has called prepare for this struct and so needs - * to later call the complete function. - */ -typedef struct { - CPUState *cpu; - struct page_collection *pages; - ram_addr_t ram_addr; - vaddr mem_vaddr; - unsigned size; - bool active; -} NotDirtyInfo; - -/** - * memory_notdirty_write_prepare: call before writing to non-dirty memory - * @ndi: pointer to opaque NotDirtyInfo struct - * @cpu: CPU doing the write - * @mem_vaddr: virtual address of write - * @ram_addr: the ram address of the write - * @size: size of write in bytes - * - * Any code which writes to the host memory corresponding to - * guest RAM which has been marked as NOTDIRTY must wrap those - * writes in calls to memory_notdirty_write_prepare() and - * memory_notdirty_write_complete(): - * - * NotDirtyInfo ndi; - * memory_notdirty_write_prepare(&ndi, ....); - * ... perform write here ... - * memory_notdirty_write_complete(&ndi); - * - * These calls will ensure that we flush any TCG translated code for - * the memory being written, update the dirty bits and (if possible) - * remove the slowpath callback for writing to the memory. - * - * This must only be called if we are using TCG; it will assert otherwise. - * - * We may take locks in the prepare call, so callers must ensure that - * they don't exit (via longjump or otherwise) without calling complete. - * - * This call must only be made inside an RCU critical section. - * (Note that while we're executing a TCG TB we're always in an - * RCU critical section, which is likely to be the case for callers - * of these functions.) - */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size); -/** - * memory_notdirty_write_complete: finish write to non-dirty memory - * @ndi: pointer to the opaque NotDirtyInfo struct which was initialized - * by memory_not_dirty_write_prepare(). - */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi); - #endif #endif -- cgit v1.2.3 From 73bc0bd41b84aaee489118da1d20901d27f3654a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 19:28:48 -0700 Subject: cputlb: Handle TLB_NOTDIRTY in probe_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use notdirty_write for the write and return a valid host pointer for this case. Reviewed-by: David Hildenbrand Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 3e91838519..b56e9ddf8c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1168,16 +1168,24 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, return NULL; } - /* Handle watchpoints. */ - if (tlb_addr & TLB_WATCHPOINT) { - cpu_check_watchpoint(env_cpu(env), addr, size, - env_tlb(env)->d[mmu_idx].iotlb[index].attrs, - wp_access, retaddr); - } + if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; - /* Reject I/O access, or other required slow-path. */ - if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { - return NULL; + /* Reject I/O access, or other required slow-path. */ + if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { + return NULL; + } + + /* Handle watchpoints. */ + if (tlb_addr & TLB_WATCHPOINT) { + cpu_check_watchpoint(env_cpu(env), addr, size, + iotlbentry->attrs, wp_access, retaddr); + } + + /* Handle clean RAM pages. */ + if (tlb_addr & TLB_NOTDIRTY) { + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); + } } return (void *)((uintptr_t)addr + entry->addend); -- cgit v1.2.3 From b56668bbe15cb094b4d928870635d079825f3731 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 19:44:01 -0700 Subject: cputlb: Remove cpu->mem_io_vaddr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the merge of notdirty handling into store_helper, the last user of cpu->mem_io_vaddr was removed. Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 2 -- hw/core/cpu.c | 1 - include/hw/core/cpu.h | 2 -- 3 files changed, 5 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index b56e9ddf8c..4b24811ce7 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -928,7 +928,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, cpu_io_recompile(cpu, retaddr); } - cpu->mem_io_vaddr = addr; cpu->mem_io_access_type = access_type; if (mr->global_locking && !qemu_mutex_iothread_locked()) { @@ -968,7 +967,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } - cpu->mem_io_vaddr = addr; cpu->mem_io_pc = retaddr; if (mr->global_locking && !qemu_mutex_iothread_locked()) { diff --git a/hw/core/cpu.c b/hw/core/cpu.c index 0035845511..73b1ee34d0 100644 --- a/hw/core/cpu.c +++ b/hw/core/cpu.c @@ -261,7 +261,6 @@ static void cpu_common_reset(CPUState *cpu) cpu->interrupt_request = 0; cpu->halted = 0; cpu->mem_io_pc = 0; - cpu->mem_io_vaddr = 0; cpu->icount_extra = 0; atomic_set(&cpu->icount_decr_ptr->u32, 0); cpu->can_do_io = 1; diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index c7cda65c66..031f587e51 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -338,7 +338,6 @@ struct qemu_work_item; * @next_cpu: Next CPU sharing TB cache. * @opaque: User data. * @mem_io_pc: Host Program Counter at which the memory was accessed. - * @mem_io_vaddr: Target virtual address at which the memory was accessed. * @kvm_fd: vCPU file descriptor for KVM. * @work_mutex: Lock to prevent multiple access to queued_work_*. * @queued_work_first: First asynchronous work pending. @@ -413,7 +412,6 @@ struct CPUState { * we store some rarely used information in the CPU context. */ uintptr_t mem_io_pc; - vaddr mem_io_vaddr; /* * This is only needed for the legacy cpu_unassigned_access() hook; * when all targets using it have been converted to use -- cgit v1.2.3 From ce9f5e279223cf422eae8e228fe03d6f60e251a4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 20:03:36 -0700 Subject: cputlb: Remove tb_invalidate_phys_page_range is_cpu_write_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All callers pass false to this argument. Remove it and pass the constant on to tb_invalidate_phys_page_range__locked. Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 6 ++---- accel/tcg/translate-all.h | 3 +-- exec.c | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 5d1e08b169..de4b697163 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1983,8 +1983,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, * * Called with mmap_lock held for user-mode emulation */ -void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access) +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end) { struct page_collection *pages; PageDesc *p; @@ -1996,8 +1995,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, return; } pages = page_collection_lock(start, end); - tb_invalidate_phys_page_range__locked(pages, p, start, end, - is_cpu_write_access); + tb_invalidate_phys_page_range__locked(pages, p, start, end, 0); page_collection_unlock(pages); } diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index 64f5fd9a05..31f2117188 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -28,8 +28,7 @@ struct page_collection *page_collection_lock(tb_page_addr_t start, void page_collection_unlock(struct page_collection *set); void tb_invalidate_phys_page_fast(struct page_collection *pages, tb_page_addr_t start, int len); -void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access); +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end); void tb_check_watchpoint(CPUState *cpu); #ifdef CONFIG_USER_ONLY diff --git a/exec.c b/exec.c index 7d835b1a2b..b3df826039 100644 --- a/exec.c +++ b/exec.c @@ -1012,7 +1012,7 @@ const char *parse_cpu_option(const char *cpu_option) void tb_invalidate_phys_addr(target_ulong addr) { mmap_lock(); - tb_invalidate_phys_page_range(addr, addr + 1, 0); + tb_invalidate_phys_page_range(addr, addr + 1); mmap_unlock(); } @@ -1039,7 +1039,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) return; } ram_addr = memory_region_get_ram_addr(mr) + addr; - tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); + tb_invalidate_phys_page_range(ram_addr, ram_addr + 1); rcu_read_unlock(); } -- cgit v1.2.3 From 5a7c27bb8a50a1b441a5b4c9d8e65727edec7f27 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 20:16:09 -0700 Subject: cputlb: Pass retaddr to tb_invalidate_phys_page_fast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than rely on cpu->mem_io_pc, pass retaddr down directly. Within tb_invalidate_phys_page_range__locked, the is_cpu_write_access parameter is non-zero exactly when retaddr would be non-zero, so that is a simple replacement. Recognize that current_tb_not_found is true only when mem_io_pc (and now retaddr) are also non-zero, so remove a redundant test. Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 6 +----- accel/tcg/translate-all.c | 39 +++++++++++++++++++-------------------- accel/tcg/translate-all.h | 3 ++- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 4b24811ce7..defc8d5929 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1094,11 +1094,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { struct page_collection *pages = page_collection_lock(ram_addr, ram_addr + size); - - /* We require mem_io_pc in tb_invalidate_phys_page_range. */ - cpu->mem_io_pc = retaddr; - - tb_invalidate_phys_page_fast(pages, ram_addr, size); + tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); page_collection_unlock(pages); } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index de4b697163..db77fb221b 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1889,7 +1889,7 @@ static void tb_invalidate_phys_page_range__locked(struct page_collection *pages, PageDesc *p, tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access) + uintptr_t retaddr) { TranslationBlock *tb; tb_page_addr_t tb_start, tb_end; @@ -1897,9 +1897,9 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, #ifdef TARGET_HAS_PRECISE_SMC CPUState *cpu = current_cpu; CPUArchState *env = NULL; - int current_tb_not_found = is_cpu_write_access; + bool current_tb_not_found = retaddr != 0; + bool current_tb_modified = false; TranslationBlock *current_tb = NULL; - int current_tb_modified = 0; target_ulong current_pc = 0; target_ulong current_cs_base = 0; uint32_t current_flags = 0; @@ -1931,24 +1931,21 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (!(tb_end <= start || tb_start >= end)) { #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_not_found) { - current_tb_not_found = 0; - current_tb = NULL; - if (cpu->mem_io_pc) { - /* now we have a real cpu fault */ - current_tb = tcg_tb_lookup(cpu->mem_io_pc); - } + current_tb_not_found = false; + /* now we have a real cpu fault */ + current_tb = tcg_tb_lookup(retaddr); } if (current_tb == tb && (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { - /* If we are modifying the current TB, we must stop - its execution. We could be more precise by checking - that the modification is after the current PC, but it - would require a specialized function to partially - restore the CPU state */ - - current_tb_modified = 1; - cpu_restore_state_from_tb(cpu, current_tb, - cpu->mem_io_pc, true); + /* + * If we are modifying the current TB, we must stop + * its execution. We could be more precise by checking + * that the modification is after the current PC, but it + * would require a specialized function to partially + * restore the CPU state. + */ + current_tb_modified = true; + cpu_restore_state_from_tb(cpu, current_tb, retaddr, true); cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, ¤t_flags); } @@ -2042,7 +2039,8 @@ void tb_invalidate_phys_range(target_ulong start, target_ulong end) * Call with all @pages in the range [@start, @start + len[ locked. */ void tb_invalidate_phys_page_fast(struct page_collection *pages, - tb_page_addr_t start, int len) + tb_page_addr_t start, int len, + uintptr_t retaddr) { PageDesc *p; @@ -2069,7 +2067,8 @@ void tb_invalidate_phys_page_fast(struct page_collection *pages, } } else { do_invalidate: - tb_invalidate_phys_page_range__locked(pages, p, start, start + len, 1); + tb_invalidate_phys_page_range__locked(pages, p, start, start + len, + retaddr); } } #else diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index 31f2117188..135c1ea96a 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -27,7 +27,8 @@ struct page_collection *page_collection_lock(tb_page_addr_t start, tb_page_addr_t end); void page_collection_unlock(struct page_collection *set); void tb_invalidate_phys_page_fast(struct page_collection *pages, - tb_page_addr_t start, int len); + tb_page_addr_t start, int len, + uintptr_t retaddr); void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end); void tb_check_watchpoint(CPUState *cpu); -- cgit v1.2.3 From ae57db63acf5a0399232f852acc5c1d83ef63400 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 20:24:12 -0700 Subject: cputlb: Pass retaddr to tb_check_watchpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the previous TLB_WATCHPOINT patches because we are currently failing to set cpu->mem_io_pc with the call to cpu_check_watchpoint. Pass down the retaddr directly because it's readily available. Fixes: 50b107c5d61 Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 6 +++--- accel/tcg/translate-all.h | 2 +- exec.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index db77fb221b..66d4bc4341 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2142,16 +2142,16 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) #endif /* user-mode: call with mmap_lock held */ -void tb_check_watchpoint(CPUState *cpu) +void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) { TranslationBlock *tb; assert_memory_lock(); - tb = tcg_tb_lookup(cpu->mem_io_pc); + tb = tcg_tb_lookup(retaddr); if (tb) { /* We can use retranslation to find the PC. */ - cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc, true); + cpu_restore_state_from_tb(cpu, tb, retaddr, true); tb_phys_invalidate(tb, -1); } else { /* The exception probably happened in a helper. The CPU state should diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index 135c1ea96a..a557b4e2bb 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -30,7 +30,7 @@ void tb_invalidate_phys_page_fast(struct page_collection *pages, tb_page_addr_t start, int len, uintptr_t retaddr); void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end); -void tb_check_watchpoint(CPUState *cpu); +void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr); #ifdef CONFIG_USER_ONLY int page_unprotect(target_ulong address, uintptr_t pc); diff --git a/exec.c b/exec.c index b3df826039..8a0a6613b1 100644 --- a/exec.c +++ b/exec.c @@ -2758,7 +2758,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, cpu->watchpoint_hit = wp; mmap_lock(); - tb_check_watchpoint(cpu); + tb_check_watchpoint(cpu, ra); if (wp->flags & BP_STOP_BEFORE_ACCESS) { cpu->exception_index = EXCP_DEBUG; mmap_unlock(); -- cgit v1.2.3