aboutsummaryrefslogtreecommitdiff
path: root/exec.c
diff options
context:
space:
mode:
authorEmilio G. Cota <cota@braap.org>2017-08-04 23:46:31 -0400
committerRichard Henderson <richard.henderson@linaro.org>2018-06-15 08:18:48 -1000
commit0ac20318ce16f4de288969b2007ef5a654176058 (patch)
treeba285e1540b4d875b476acd72f364d8aaa7165d5 /exec.c
parent705ad1ff0ce264475cb4c9a3aa31ba94a04869fe (diff)
tcg: remove tb_lock
Use mmap_lock in user-mode to protect TCG state and the page descriptors. In !user-mode, each vCPU has its own TCG state, so no locks needed. Per-page locks are used to protect the page descriptors. Per-TB locks are used in both modes to protect TB jumps. Some notes: - tb_lock is removed from notdirty_mem_write by passing a locked page_collection to tb_invalidate_phys_page_fast. - tcg_tb_lookup/remove/insert/etc have their own internal lock(s), so there is no need to further serialize access to them. - do_tb_flush is run in a safe async context, meaning no other vCPU threads are running. Therefore acquiring mmap_lock there is just to please tools such as thread sanitizer. - Not visible in the diff, but tb_invalidate_phys_page already has an assert_memory_lock. - cpu_io_recompile is !user-only, so no mmap_lock there. - Added mmap_unlock()'s before all siglongjmp's that could be called in user-mode while mmap_lock is held. + Added an assert for !have_mmap_lock() after returning from the longjmp in cpu_exec, just like we do in cpu_exec_step_atomic. Performance numbers before/after: Host: AMD Opteron(tm) Processor 6376 ubuntu 17.04 ppc64 bootup+shutdown time 700 +-+--+----+------+------------+-----------+------------*--+-+ | + + + + + *B | | before ***B*** ** * | |tb lock removal ###D### *** | 600 +-+ *** +-+ | ** # | | *B* #D | | *** * ## | 500 +-+ *** ### +-+ | * *** ### | | *B* # ## | | ** * #D# | 400 +-+ ** ## +-+ | ** ### | | ** ## | | ** # ## | 300 +-+ * B* #D# +-+ | B *** ### | | * ** #### | | * *** ### | 200 +-+ B *B #D# +-+ | #B* * ## # | | #* ## | | + D##D# + + + + | 100 +-+--+----+------+------------+-----------+------------+--+-+ 1 8 16 Guest CPUs 48 64 png: https://imgur.com/HwmBHXe debian jessie aarch64 bootup+shutdown time 90 +-+--+-----+-----+------------+------------+------------+--+-+ | + + + + + + | | before ***B*** B | 80 +tb lock removal ###D### **D +-+ | **### | | **## | 70 +-+ ** # +-+ | ** ## | | ** # | 60 +-+ *B ## +-+ | ** ## | | *** #D | 50 +-+ *** ## +-+ | * ** ### | | **B* ### | 40 +-+ **** # ## +-+ | **** #D# | | ***B** ### | 30 +-+ B***B** #### +-+ | B * * # ### | | B ###D# | 20 +-+ D ##D## +-+ | D# | | + + + + + + | 10 +-+--+-----+-----+------------+------------+------------+--+-+ 1 8 16 Guest CPUs 48 64 png: https://imgur.com/iGpGFtv The gains are high for 4-8 CPUs. Beyond that point, however, unrelated lock contention significantly hurts scalability. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'exec.c')
-rw-r--r--exec.c26
1 files changed, 11 insertions, 15 deletions
diff --git a/exec.c b/exec.c
index ebadc0e302..28f9bdcbf9 100644
--- a/exec.c
+++ b/exec.c
@@ -1031,9 +1031,7 @@ const char *parse_cpu_model(const char *cpu_model)
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
{
mmap_lock();
- tb_lock();
tb_invalidate_phys_page_range(pc, pc + 1, 0);
- tb_unlock();
mmap_unlock();
}
#else
@@ -2644,21 +2642,21 @@ void memory_notdirty_write_prepare(NotDirtyInfo *ndi,
ndi->ram_addr = ram_addr;
ndi->mem_vaddr = mem_vaddr;
ndi->size = size;
- ndi->locked = false;
+ ndi->pages = NULL;
assert(tcg_enabled());
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
- ndi->locked = true;
- tb_lock();
- tb_invalidate_phys_page_fast(ram_addr, size);
+ ndi->pages = page_collection_lock(ram_addr, ram_addr + size);
+ tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size);
}
}
/* Called within RCU critical section. */
void memory_notdirty_write_complete(NotDirtyInfo *ndi)
{
- if (ndi->locked) {
- tb_unlock();
+ if (ndi->pages) {
+ page_collection_unlock(ndi->pages);
+ ndi->pages = NULL;
}
/* Set both VGA and migration bits for simplicity and to remove
@@ -2745,18 +2743,16 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
}
cpu->watchpoint_hit = wp;
- /* Both tb_lock and iothread_mutex will be reset when
- * cpu_loop_exit or cpu_loop_exit_noexc longjmp
- * back into the cpu_exec main loop.
- */
- tb_lock();
+ mmap_lock();
tb_check_watchpoint(cpu);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
+ mmap_unlock();
cpu_loop_exit(cpu);
} else {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
+ mmap_unlock();
cpu_loop_exit_noexc(cpu);
}
}
@@ -3147,9 +3143,9 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
}
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
assert(tcg_enabled());
- tb_lock();
+ mmap_lock();
tb_invalidate_phys_range(addr, addr + length);
- tb_unlock();
+ mmap_unlock();
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
}
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);