aboutsummaryrefslogtreecommitdiff
path: root/include/exec
diff options
context:
space:
mode:
authorEmilio G. Cota <cota@braap.org>2017-08-04 23:46:31 -0400
committerRichard Henderson <richard.henderson@linaro.org>2018-06-15 08:18:48 -1000
commit0ac20318ce16f4de288969b2007ef5a654176058 (patch)
treeba285e1540b4d875b476acd72f364d8aaa7165d5 /include/exec
parent705ad1ff0ce264475cb4c9a3aa31ba94a04869fe (diff)
tcg: remove tb_lock
Use mmap_lock in user-mode to protect TCG state and the page descriptors. In !user-mode, each vCPU has its own TCG state, so no locks needed. Per-page locks are used to protect the page descriptors. Per-TB locks are used in both modes to protect TB jumps. Some notes: - tb_lock is removed from notdirty_mem_write by passing a locked page_collection to tb_invalidate_phys_page_fast. - tcg_tb_lookup/remove/insert/etc have their own internal lock(s), so there is no need to further serialize access to them. - do_tb_flush is run in a safe async context, meaning no other vCPU threads are running. Therefore acquiring mmap_lock there is just to please tools such as thread sanitizer. - Not visible in the diff, but tb_invalidate_phys_page already has an assert_memory_lock. - cpu_io_recompile is !user-only, so no mmap_lock there. - Added mmap_unlock()'s before all siglongjmp's that could be called in user-mode while mmap_lock is held. + Added an assert for !have_mmap_lock() after returning from the longjmp in cpu_exec, just like we do in cpu_exec_step_atomic. Performance numbers before/after: Host: AMD Opteron(tm) Processor 6376 ubuntu 17.04 ppc64 bootup+shutdown time 700 +-+--+----+------+------------+-----------+------------*--+-+ | + + + + + *B | | before ***B*** ** * | |tb lock removal ###D### *** | 600 +-+ *** +-+ | ** # | | *B* #D | | *** * ## | 500 +-+ *** ### +-+ | * *** ### | | *B* # ## | | ** * #D# | 400 +-+ ** ## +-+ | ** ### | | ** ## | | ** # ## | 300 +-+ * B* #D# +-+ | B *** ### | | * ** #### | | * *** ### | 200 +-+ B *B #D# +-+ | #B* * ## # | | #* ## | | + D##D# + + + + | 100 +-+--+----+------+------------+-----------+------------+--+-+ 1 8 16 Guest CPUs 48 64 png: https://imgur.com/HwmBHXe debian jessie aarch64 bootup+shutdown time 90 +-+--+-----+-----+------------+------------+------------+--+-+ | + + + + + + | | before ***B*** B | 80 +tb lock removal ###D### **D +-+ | **### | | **## | 70 +-+ ** # +-+ | ** ## | | ** # | 60 +-+ *B ## +-+ | ** ## | | *** #D | 50 +-+ *** ## +-+ | * ** ### | | **B* ### | 40 +-+ **** # ## +-+ | **** #D# | | ***B** ### | 30 +-+ B***B** #### +-+ | B * * # ### | | B ###D# | 20 +-+ D ##D## +-+ | D# | | + + + + + + | 10 +-+--+-----+-----+------------+------------+------------+--+-+ 1 8 16 Guest CPUs 48 64 png: https://imgur.com/iGpGFtv The gains are high for 4-8 CPUs. Beyond that point, however, unrelated lock contention significantly hurts scalability. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'include/exec')
-rw-r--r--include/exec/cpu-common.h2
-rw-r--r--include/exec/exec-all.h4
-rw-r--r--include/exec/memory-internal.h6
-rw-r--r--include/exec/tb-context.h2
4 files changed, 5 insertions, 9 deletions
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 0b58e262f3..18b40d6145 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -23,7 +23,7 @@ typedef struct CPUListState {
FILE *file;
} CPUListState;
-/* The CPU list lock nests outside tb_lock/tb_unlock. */
+/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
void qemu_init_cpu_list(void);
void cpu_list_lock(void);
void cpu_list_unlock(void);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 3c2a0efb55..25a6f28ab8 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -440,10 +440,6 @@ extern uintptr_t tci_tb_ptr;
smaller than 4 bytes, so we don't worry about special-casing this. */
#define GETPC_ADJ 2
-void tb_lock(void);
-void tb_unlock(void);
-void tb_lock_reset(void);
-
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG)
void assert_no_pages_locked(void);
#else
diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h
index 56c25c0ef7..bb08fa4d2f 100644
--- a/include/exec/memory-internal.h
+++ b/include/exec/memory-internal.h
@@ -49,6 +49,8 @@ void mtree_print_dispatch(fprintf_function mon, void *f,
struct AddressSpaceDispatch *d,
MemoryRegion *root);
+struct page_collection;
+
/* Opaque struct for passing info from memory_notdirty_write_prepare()
* to memory_notdirty_write_complete(). Callers should treat all fields
* as private, with the exception of @active.
@@ -60,10 +62,10 @@ void mtree_print_dispatch(fprintf_function mon, void *f,
*/
typedef struct {
CPUState *cpu;
+ struct page_collection *pages;
ram_addr_t ram_addr;
vaddr mem_vaddr;
unsigned size;
- bool locked;
bool active;
} NotDirtyInfo;
@@ -91,7 +93,7 @@ typedef struct {
*
* This must only be called if we are using TCG; it will assert otherwise.
*
- * We may take a lock in the prepare call, so callers must ensure that
+ * We may take locks in the prepare call, so callers must ensure that
* they don't exit (via longjump or otherwise) without calling complete.
*
* This call must only be made inside an RCU critical section.
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index 8c9b49c98e..feb585e0a7 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -32,8 +32,6 @@ typedef struct TBContext TBContext;
struct TBContext {
struct qht htable;
- /* any access to the tbs or the page table must use this lock */
- QemuMutex tb_lock;
/* statistics */
unsigned tb_flush_count;