From 383beda9cf32f795616c3b93f7d6154d70372d4b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Oct 2018 13:51:25 -0400 Subject: tcg: Add tlb_index and tlb_entry helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Isolate the computation of an index from an address into a helper before we change that function. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson [ cota: convert tlb_vaddr_to_host; use atomic_read on addr_write ] Signed-off-by: Emilio G. Cota Message-Id: <20181009175129.17888-2-cota@braap.org> --- include/exec/cpu_ldst.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/exec/cpu_ldst.h') diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 41ed0526e2..f54d91ff68 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -126,6 +126,20 @@ extern __thread uintptr_t helper_retaddr; /* The memory helpers for tcg-generated code need tcg_target_long etc. */ #include "tcg.h" +/* Find the TLB index corresponding to the mmu_idx + address pair. */ +static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); +} + +/* Find the TLB entry corresponding to the mmu_idx + address pair. */ +static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; +} + #ifdef MMU_MODE0_SUFFIX #define CPU_MMU_INDEX 0 #define MEMSUFFIX MMU_MODE0_SUFFIX @@ -416,8 +430,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, #if defined(CONFIG_USER_ONLY) return g2h(addr); #else - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; + CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr); abi_ptr tlb_addr; uintptr_t haddr; @@ -445,7 +458,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, return NULL; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + tlbentry->addend; return (void *)haddr; #endif /* defined(CONFIG_USER_ONLY) */ } -- cgit v1.2.3 From 403f290c0603f35f2d09c982bf5549b6d0803ec1 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Tue, 16 Oct 2018 11:38:40 -0400 Subject: cputlb: read CPUTLBEntry.addr_write atomically Updates can come from other threads, so readers that do not take tlb_lock must use atomic_read to avoid undefined behaviour (UB). This completes the conversion to tlb_lock. This conversion results on average in no performance loss, as the following experiments (run on an Intel i7-6700K CPU @ 4.00GHz) show. 1. aarch64 bootup+shutdown test: - Before: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) 7.504481349 seconds time elapsed ( +- 0.14% ) - After: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) 7.474970463 seconds time elapsed ( +- 0.07% ) 2. SPEC06int: SPEC06int (test set) [Y axis: Speedup over master] 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | +++ | +++ tlb-lock-v3 (spinl|ck) | | +++ | | +++ +++ | | | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean png: https://imgur.com/a/BHzpPTW Notes: - tlb-lock-v2 corresponds to an implementation with a mutex. - tlb-lock-v3 corresponds to the current implementation, i.e. a spinlock and a single lock acquisition in tlb_set_page_with_attrs. Signed-off-by: Emilio G. Cota Message-Id: <20181016153840.25877-1-cota@braap.org> Signed-off-by: Richard Henderson --- include/exec/cpu_ldst.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/exec/cpu_ldst.h') diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index f54d91ff68..959068495a 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -126,6 +126,15 @@ extern __thread uintptr_t helper_retaddr; /* The memory helpers for tcg-generated code need tcg_target_long etc. */ #include "tcg.h" +static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) +{ +#if TCG_OVERSIZED_GUEST + return entry->addr_write; +#else + return atomic_read(&entry->addr_write); +#endif +} + /* Find the TLB index corresponding to the mmu_idx + address pair. */ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, target_ulong addr) @@ -439,7 +448,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, tlb_addr = tlbentry->addr_read; break; case 1: - tlb_addr = tlbentry->addr_write; + tlb_addr = tlb_addr_write(tlbentry); break; case 2: tlb_addr = tlbentry->addr_code; -- cgit v1.2.3