diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/aarch64/tcg-target.inc.c | 2 | ||||
-rw-r--r-- | tcg/arm/tcg-target.inc.c | 2 | ||||
-rw-r--r-- | tcg/i386/tcg-target.inc.c | 7 | ||||
-rw-r--r-- | tcg/mips/tcg-target.inc.c | 2 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.inc.c | 4 | ||||
-rw-r--r-- | tcg/s390/tcg-target.inc.c | 2 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.inc.c | 4 | ||||
-rw-r--r-- | tcg/tcg.c | 218 | ||||
-rw-r--r-- | tcg/tcg.h | 19 | ||||
-rw-r--r-- | tcg/tci/tcg-target.inc.c | 2 |
10 files changed, 246 insertions, 16 deletions
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index be3192078d..4562d36d1b 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -1733,7 +1733,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP); } tcg_out_insn(s, 3207, BR, TCG_REG_TMP); - s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); + set_jmp_reset_offset(s, a0); break; case INDEX_op_goto_ptr: diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index 56a32a470f..e1fbf465cb 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -1822,7 +1822,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_movi32(s, COND_AL, base, ptr - dil); } tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil); - s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); + set_jmp_reset_offset(s, args[0]); } break; case INDEX_op_goto_ptr: diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 5357909fff..e87b0d445e 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -2245,7 +2245,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, (intptr_t)(s->tb_jmp_target_addr + a0)); } - s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); + set_jmp_reset_offset(s, a0); break; case INDEX_op_goto_ptr: /* jmp to the given host address (could be epilogue) */ @@ -3501,7 +3501,10 @@ static void tcg_target_init(TCGContext *s) sure of not hitting invalid opcode. */ if (c & bit_OSXSAVE) { unsigned xcrl, xcrh; - asm ("xgetbv" : "=a" (xcrl), "=d" (xcrh) : "c" (0)); + /* The xgetbv instruction is not available to older versions of + * the assembler, so we encode the instruction manually. + */ + asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0)); if ((xcrl & 6) == 6) { have_avx1 = (c & bit_AVX) != 0; have_avx2 = (b7 & bit_AVX2) != 0; diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index ca5f1d4894..cff525373b 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -1744,7 +1744,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } tcg_out_nop(s); - s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); + set_jmp_reset_offset(s, a0); break; case INDEX_op_goto_ptr: /* jmp to the given host address (could be epilogue) */ diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 86f7de5f7e..c2f729ee8f 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -2025,10 +2025,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); tcg_out32(s, BCCTR | BO_ALWAYS); - s->tb_jmp_reset_offset[args[0]] = c = tcg_current_code_size(s); + set_jmp_reset_offset(s, args[0]); if (USE_REG_TB) { /* For the unlinked case, need to reset TCG_REG_TB. */ - c = -c; + c = -tcg_current_code_size(s); assert(c == (int16_t)c); tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c)); } diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 9af6dcef05..17c435ade5 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -1783,7 +1783,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* and go there */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB); } - s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); + set_jmp_reset_offset(s, a0); /* For the unlinked path of goto_tb, we need to reset TCG_REG_TB to the beginning of this TB. */ diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index bc673bd8c6..04bdc3df5e 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -1388,12 +1388,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL); tcg_out_nop(s); } - s->tb_jmp_reset_offset[a0] = c = tcg_current_code_size(s); + set_jmp_reset_offset(s, a0); /* For the unlinked path of goto_tb, we need to reset TCG_REG_TB to the beginning of this TB. */ if (USE_REG_TB) { - c = -c; + c = -tcg_current_code_size(s); if (check_fit_i32(c, 13)) { tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); } else { @@ -135,6 +135,12 @@ static TCGContext **tcg_ctxs; static unsigned int n_tcg_ctxs; TCGv_env cpu_env = 0; +struct tcg_region_tree { + QemuMutex lock; + GTree *tree; + /* padding to avoid false sharing is computed at run-time */ +}; + /* * We divide code_gen_buffer into equally-sized "regions" that TCG threads * dynamically allocate from as demand dictates. Given appropriate region @@ -158,6 +164,13 @@ struct tcg_region_state { }; static struct tcg_region_state region; +/* + * This is an array of struct tcg_region_tree's, with padding. + * We use void * to simplify the computation of region_trees[i]; each + * struct is found every tree_size bytes. + */ +static void *region_trees; +static size_t tree_size; static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; static TCGRegSet tcg_target_call_clobber_regs; @@ -293,8 +306,190 @@ TCGLabel *gen_new_label(void) return l; } +static void set_jmp_reset_offset(TCGContext *s, int which) +{ + size_t off = tcg_current_code_size(s); + s->tb_jmp_reset_offset[which] = off; + /* Make sure that we didn't overflow the stored offset. */ + assert(s->tb_jmp_reset_offset[which] == off); +} + #include "tcg-target.inc.c" +/* compare a pointer @ptr and a tb_tc @s */ +static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) +{ + if (ptr >= s->ptr + s->size) { + return 1; + } else if (ptr < s->ptr) { + return -1; + } + return 0; +} + +static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) +{ + const struct tb_tc *a = ap; + const struct tb_tc *b = bp; + + /* + * When both sizes are set, we know this isn't a lookup. + * This is the most likely case: every TB must be inserted; lookups + * are a lot less frequent. + */ + if (likely(a->size && b->size)) { + if (a->ptr > b->ptr) { + return 1; + } else if (a->ptr < b->ptr) { + return -1; + } + /* a->ptr == b->ptr should happen only on deletions */ + g_assert(a->size == b->size); + return 0; + } + /* + * All lookups have either .size field set to 0. + * From the glib sources we see that @ap is always the lookup key. However + * the docs provide no guarantee, so we just mark this case as likely. + */ + if (likely(a->size == 0)) { + return ptr_cmp_tb_tc(a->ptr, b); + } + return ptr_cmp_tb_tc(b->ptr, a); +} + +static void tcg_region_trees_init(void) +{ + size_t i; + + tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); + region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); + for (i = 0; i < region.n; i++) { + struct tcg_region_tree *rt = region_trees + i * tree_size; + + qemu_mutex_init(&rt->lock); + rt->tree = g_tree_new(tb_tc_cmp); + } +} + +static struct tcg_region_tree *tc_ptr_to_region_tree(void *p) +{ + size_t region_idx; + + if (p < region.start_aligned) { + region_idx = 0; + } else { + ptrdiff_t offset = p - region.start_aligned; + + if (offset > region.stride * (region.n - 1)) { + region_idx = region.n - 1; + } else { + region_idx = offset / region.stride; + } + } + return region_trees + region_idx * tree_size; +} + +void tcg_tb_insert(TranslationBlock *tb) +{ + struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); + + qemu_mutex_lock(&rt->lock); + g_tree_insert(rt->tree, &tb->tc, tb); + qemu_mutex_unlock(&rt->lock); +} + +void tcg_tb_remove(TranslationBlock *tb) +{ + struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); + + qemu_mutex_lock(&rt->lock); + g_tree_remove(rt->tree, &tb->tc); + qemu_mutex_unlock(&rt->lock); +} + +/* + * Find the TB 'tb' such that + * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size + * Return NULL if not found. + */ +TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) +{ + struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); + TranslationBlock *tb; + struct tb_tc s = { .ptr = (void *)tc_ptr }; + + qemu_mutex_lock(&rt->lock); + tb = g_tree_lookup(rt->tree, &s); + qemu_mutex_unlock(&rt->lock); + return tb; +} + +static void tcg_region_tree_lock_all(void) +{ + size_t i; + + for (i = 0; i < region.n; i++) { + struct tcg_region_tree *rt = region_trees + i * tree_size; + + qemu_mutex_lock(&rt->lock); + } +} + +static void tcg_region_tree_unlock_all(void) +{ + size_t i; + + for (i = 0; i < region.n; i++) { + struct tcg_region_tree *rt = region_trees + i * tree_size; + + qemu_mutex_unlock(&rt->lock); + } +} + +void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) +{ + size_t i; + + tcg_region_tree_lock_all(); + for (i = 0; i < region.n; i++) { + struct tcg_region_tree *rt = region_trees + i * tree_size; + + g_tree_foreach(rt->tree, func, user_data); + } + tcg_region_tree_unlock_all(); +} + +size_t tcg_nb_tbs(void) +{ + size_t nb_tbs = 0; + size_t i; + + tcg_region_tree_lock_all(); + for (i = 0; i < region.n; i++) { + struct tcg_region_tree *rt = region_trees + i * tree_size; + + nb_tbs += g_tree_nnodes(rt->tree); + } + tcg_region_tree_unlock_all(); + return nb_tbs; +} + +static void tcg_region_tree_reset_all(void) +{ + size_t i; + + tcg_region_tree_lock_all(); + for (i = 0; i < region.n; i++) { + struct tcg_region_tree *rt = region_trees + i * tree_size; + + /* Increment the refcount first so that destroy acts as a reset */ + g_tree_ref(rt->tree); + g_tree_destroy(rt->tree); + } + tcg_region_tree_unlock_all(); +} + static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) { void *start, *end; @@ -380,6 +575,8 @@ void tcg_region_reset_all(void) g_assert(!err); } qemu_mutex_unlock(®ion.lock); + + tcg_region_tree_reset_all(); } #ifdef CONFIG_USER_ONLY @@ -496,6 +693,8 @@ void tcg_region_init(void) g_assert(!rc); } + tcg_region_trees_init(); + /* In user-mode we support only one ctx, so do the initial allocation now */ #ifdef CONFIG_USER_ONLY { @@ -600,6 +799,20 @@ size_t tcg_code_capacity(void) return capacity; } +size_t tcg_tb_phys_invalidate_count(void) +{ + unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); + unsigned int i; + size_t total = 0; + + for (i = 0; i < n_ctxs; i++) { + const TCGContext *s = atomic_read(&tcg_ctxs[i]); + + total += atomic_read(&s->tb_phys_invalidate_count); + } + return total; +} + /* pool based memory allocation */ void *tcg_malloc_internal(TCGContext *s, int size) { @@ -3327,7 +3540,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) break; case INDEX_op_insn_start: if (num_insns >= 0) { - s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); + size_t off = tcg_current_code_size(s); + s->gen_insn_end_off[num_insns] = off; + /* Assert that we do not overflow our stored offset. */ + assert(s->gen_insn_end_off[num_insns] == off); } num_insns++; for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { @@ -695,6 +695,8 @@ struct TCGContext { /* Threshold to flush the translated code buffer. */ void *code_gen_highwater; + size_t tb_phys_invalidate_count; + /* Track which vCPU triggers events */ CPUState *cpu; /* *_trans */ @@ -848,14 +850,16 @@ static inline bool tcg_op_buf_full(void) /* This is not a hard limit, it merely stops translation when * we have produced "enough" opcodes. We want to limit TB size * such that a RISC host can reasonably use a 16-bit signed - * branch within the TB. + * branch within the TB. We also need to be mindful of the + * 16-bit unsigned offsets, TranslationBlock.jmp_reset_offset[] + * and TCGContext.gen_insn_end_off[]. */ - return tcg_ctx->nb_ops >= 8000; + return tcg_ctx->nb_ops >= 4000; } /* pool based memory allocation */ -/* user-mode: tb_lock must be held for tcg_malloc_internal. */ +/* user-mode: mmap_lock must be held for tcg_malloc_internal. */ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); TranslationBlock *tcg_tb_alloc(TCGContext *s); @@ -866,7 +870,14 @@ void tcg_region_reset_all(void); size_t tcg_code_size(void); size_t tcg_code_capacity(void); -/* user-mode: Called with tb_lock held. */ +void tcg_tb_insert(TranslationBlock *tb); +void tcg_tb_remove(TranslationBlock *tb); +size_t tcg_tb_phys_invalidate_count(void); +TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr); +void tcg_tb_foreach(GTraverseFunc func, gpointer user_data); +size_t tcg_nb_tbs(void); + +/* user-mode: Called with mmap_lock held. */ static inline void *tcg_malloc(int size) { TCGContext *s = tcg_ctx; diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c index cc949bea85..62ed097254 100644 --- a/tcg/tci/tcg-target.inc.c +++ b/tcg/tci/tcg-target.inc.c @@ -574,7 +574,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, /* Indirect jump method. */ TODO(); } - s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); + set_jmp_reset_offset(s, args[0]); break; case INDEX_op_br: tci_out_label(s, arg_label(args[0])); |