diff options
Diffstat (limited to 'accel/tcg/translate-all.c')
-rw-r--r-- | accel/tcg/translate-all.c | 533 |
1 files changed, 243 insertions, 290 deletions
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 1b43deb0cd..34c5e28d07 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -153,7 +153,9 @@ static int v_l2_levels; static void *l1_map[V_L1_MAX_SIZE]; /* code generation context */ -TCGContext tcg_ctx; +TCGContext tcg_init_ctx; +__thread TCGContext *tcg_ctx; +TBContext tb_ctx; bool parallel_cpus; /* translation block context */ @@ -185,7 +187,7 @@ static void page_table_config_init(void) void tb_lock(void) { assert_tb_unlocked(); - qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_lock(&tb_ctx.tb_lock); have_tb_lock++; } @@ -193,13 +195,13 @@ void tb_unlock(void) { assert_tb_locked(); have_tb_lock--; - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tb_ctx.tb_lock); } void tb_lock_reset(void) { if (have_tb_lock) { - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tb_ctx.tb_lock); have_tb_lock = 0; } } @@ -208,7 +210,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); void cpu_gen_init(void) { - tcg_context_init(&tcg_ctx); + tcg_context_init(&tcg_init_ctx); } /* Encode VAL as a signed leb128 sequence at P. @@ -266,12 +268,10 @@ static target_long decode_sleb128(uint8_t **pp) static int encode_search(TranslationBlock *tb, uint8_t *block) { - uint8_t *highwater = tcg_ctx.code_gen_highwater; + uint8_t *highwater = tcg_ctx->code_gen_highwater; uint8_t *p = block; int i, j, n; - tb->tc.search = block; - for (i = 0, n = tb->icount; i < n; ++i) { target_ulong prev; @@ -279,12 +279,12 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) if (i == 0) { prev = (j == 0 ? tb->pc : 0); } else { - prev = tcg_ctx.gen_insn_data[i - 1][j]; + prev = tcg_ctx->gen_insn_data[i - 1][j]; } - p = encode_sleb128(p, tcg_ctx.gen_insn_data[i][j] - prev); + p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); } - prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]); - p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev); + prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); + p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); /* Test for (pending) buffer overflow. The assumption is that any one row beginning below the high water mark cannot overrun @@ -307,9 +307,10 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; uintptr_t host_pc = (uintptr_t)tb->tc.ptr; CPUArchState *env = cpu->env_ptr; - uint8_t *p = tb->tc.search; + uint8_t *p = tb->tc.ptr + tb->tc.size; int i, j, num_insns = tb->icount; #ifdef CONFIG_PROFILER + TCGProfile *prof = &tcg_ctx->prof; int64_t ti = profile_getclock(); #endif @@ -344,8 +345,9 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, restore_state_to_opc(env, tb, data); #ifdef CONFIG_PROFILER - tcg_ctx.restore_time += profile_getclock() - ti; - tcg_ctx.restore_count++; + atomic_set(&prof->restore_time, + prof->restore_time + profile_getclock() - ti); + atomic_set(&prof->restore_count, prof->restore_count + 1); #endif return 0; } @@ -375,7 +377,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr) if (tb->cflags & CF_NOCACHE) { /* one-shot translation, invalidate it immediately */ tb_phys_invalidate(tb, -1); - tb_free(tb); + tb_remove(tb); } r = true; } @@ -591,7 +593,7 @@ static inline void *split_cross_256mb(void *buf1, size_t size1) buf1 = buf2; } - tcg_ctx.code_gen_buffer_size = size1; + tcg_ctx->code_gen_buffer_size = size1; return buf1; } #endif @@ -600,75 +602,35 @@ static inline void *split_cross_256mb(void *buf1, size_t size1) static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] __attribute__((aligned(CODE_GEN_ALIGN))); -# ifdef _WIN32 -static inline void do_protect(void *addr, long size, int prot) -{ - DWORD old_protect; - VirtualProtect(addr, size, prot, &old_protect); -} - -static inline void map_exec(void *addr, long size) -{ - do_protect(addr, size, PAGE_EXECUTE_READWRITE); -} - -static inline void map_none(void *addr, long size) -{ - do_protect(addr, size, PAGE_NOACCESS); -} -# else -static inline void do_protect(void *addr, long size, int prot) -{ - uintptr_t start, end; - - start = (uintptr_t)addr; - start &= qemu_real_host_page_mask; - - end = (uintptr_t)addr + size; - end = ROUND_UP(end, qemu_real_host_page_size); - - mprotect((void *)start, end - start, prot); -} - -static inline void map_exec(void *addr, long size) -{ - do_protect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC); -} - -static inline void map_none(void *addr, long size) -{ - do_protect(addr, size, PROT_NONE); -} -# endif /* WIN32 */ - static inline void *alloc_code_gen_buffer(void) { void *buf = static_code_gen_buffer; - size_t full_size, size; + void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer); + size_t size; - /* The size of the buffer, rounded down to end on a page boundary. */ - full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer)) - & qemu_real_host_page_mask) - (uintptr_t)buf; + /* page-align the beginning and end of the buffer */ + buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); + end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size); - /* Reserve a guard page. */ - size = full_size - qemu_real_host_page_size; + size = end - buf; /* Honor a command-line option limiting the size of the buffer. */ - if (size > tcg_ctx.code_gen_buffer_size) { - size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size) - & qemu_real_host_page_mask) - (uintptr_t)buf; + if (size > tcg_ctx->code_gen_buffer_size) { + size = QEMU_ALIGN_DOWN(tcg_ctx->code_gen_buffer_size, + qemu_real_host_page_size); } - tcg_ctx.code_gen_buffer_size = size; + tcg_ctx->code_gen_buffer_size = size; #ifdef __mips__ if (cross_256mb(buf, size)) { buf = split_cross_256mb(buf, size); - size = tcg_ctx.code_gen_buffer_size; + size = tcg_ctx->code_gen_buffer_size; } #endif - map_exec(buf, size); - map_none(buf + size, qemu_real_host_page_size); + if (qemu_mprotect_rwx(buf, size)) { + abort(); + } qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); return buf; @@ -676,26 +638,20 @@ static inline void *alloc_code_gen_buffer(void) #elif defined(_WIN32) static inline void *alloc_code_gen_buffer(void) { - size_t size = tcg_ctx.code_gen_buffer_size; - void *buf1, *buf2; - - /* Perform the allocation in two steps, so that the guard page - is reserved but uncommitted. */ - buf1 = VirtualAlloc(NULL, size + qemu_real_host_page_size, - MEM_RESERVE, PAGE_NOACCESS); - if (buf1 != NULL) { - buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); - assert(buf1 == buf2); - } + size_t size = tcg_ctx->code_gen_buffer_size; + void *buf; - return buf1; + buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + return buf; } #else static inline void *alloc_code_gen_buffer(void) { + int prot = PROT_WRITE | PROT_READ | PROT_EXEC; int flags = MAP_PRIVATE | MAP_ANONYMOUS; uintptr_t start = 0; - size_t size = tcg_ctx.code_gen_buffer_size; + size_t size = tcg_ctx->code_gen_buffer_size; void *buf; /* Constrain the position of the buffer based on the host cpu. @@ -712,7 +668,7 @@ static inline void *alloc_code_gen_buffer(void) flags |= MAP_32BIT; /* Cannot expect to map more than 800MB in low memory. */ if (size > 800u * 1024 * 1024) { - tcg_ctx.code_gen_buffer_size = size = 800u * 1024 * 1024; + tcg_ctx->code_gen_buffer_size = size = 800u * 1024 * 1024; } # elif defined(__sparc__) start = 0x40000000ul; @@ -726,8 +682,7 @@ static inline void *alloc_code_gen_buffer(void) # endif # endif - buf = mmap((void *)start, size + qemu_real_host_page_size, - PROT_NONE, flags, -1, 0); + buf = mmap((void *)start, size, prot, flags, -1, 0); if (buf == MAP_FAILED) { return NULL; } @@ -737,24 +692,23 @@ static inline void *alloc_code_gen_buffer(void) /* Try again, with the original still mapped, to avoid re-acquiring that 256mb crossing. This time don't specify an address. */ size_t size2; - void *buf2 = mmap(NULL, size + qemu_real_host_page_size, - PROT_NONE, flags, -1, 0); + void *buf2 = mmap(NULL, size, prot, flags, -1, 0); switch ((int)(buf2 != MAP_FAILED)) { case 1: if (!cross_256mb(buf2, size)) { /* Success! Use the new buffer. */ - munmap(buf, size + qemu_real_host_page_size); + munmap(buf, size); break; } /* Failure. Work with what we had. */ - munmap(buf2, size + qemu_real_host_page_size); + munmap(buf2, size); /* fallthru */ default: /* Split the original buffer. Free the smaller half. */ buf2 = split_cross_256mb(buf, size); - size2 = tcg_ctx.code_gen_buffer_size; + size2 = tcg_ctx->code_gen_buffer_size; if (buf == buf2) { - munmap(buf + size2 + qemu_real_host_page_size, size - size2); + munmap(buf + size2, size - size2); } else { munmap(buf, size - size2); } @@ -765,10 +719,6 @@ static inline void *alloc_code_gen_buffer(void) } #endif - /* Make the final buffer accessible. The guard page at the end - will remain inaccessible with PROT_NONE. */ - mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC); - /* Request large pages for the buffer. */ qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); @@ -776,31 +726,65 @@ static inline void *alloc_code_gen_buffer(void) } #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ -static inline void code_gen_alloc(size_t tb_size) +/* compare a pointer @ptr and a tb_tc @s */ +static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) { - tcg_ctx.code_gen_buffer_size = size_code_gen_buffer(tb_size); - tcg_ctx.code_gen_buffer = alloc_code_gen_buffer(); - if (tcg_ctx.code_gen_buffer == NULL) { - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); - exit(1); + if (ptr >= s->ptr + s->size) { + return 1; + } else if (ptr < s->ptr) { + return -1; } + return 0; +} + +static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) +{ + const struct tb_tc *a = ap; + const struct tb_tc *b = bp; - /* size this conservatively -- realloc later if needed */ - tcg_ctx.tb_ctx.tbs_size = - tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8; - if (unlikely(!tcg_ctx.tb_ctx.tbs_size)) { - tcg_ctx.tb_ctx.tbs_size = 64 * 1024; + /* + * When both sizes are set, we know this isn't a lookup. + * This is the most likely case: every TB must be inserted; lookups + * are a lot less frequent. + */ + if (likely(a->size && b->size)) { + if (a->ptr > b->ptr) { + return 1; + } else if (a->ptr < b->ptr) { + return -1; + } + /* a->ptr == b->ptr should happen only on deletions */ + g_assert(a->size == b->size); + return 0; + } + /* + * All lookups have either .size field set to 0. + * From the glib sources we see that @ap is always the lookup key. However + * the docs provide no guarantee, so we just mark this case as likely. + */ + if (likely(a->size == 0)) { + return ptr_cmp_tb_tc(a->ptr, b); } - tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx.tb_ctx.tbs_size); + return ptr_cmp_tb_tc(b->ptr, a); +} - qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); +static inline void code_gen_alloc(size_t tb_size) +{ + tcg_ctx->code_gen_buffer_size = size_code_gen_buffer(tb_size); + tcg_ctx->code_gen_buffer = alloc_code_gen_buffer(); + if (tcg_ctx->code_gen_buffer == NULL) { + fprintf(stderr, "Could not allocate dynamic translator buffer\n"); + exit(1); + } + tb_ctx.tb_tree = g_tree_new(tb_tc_cmp); + qemu_mutex_init(&tb_ctx.tb_lock); } static void tb_htable_init(void) { unsigned int mode = QHT_MODE_AUTO_RESIZE; - qht_init(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE, mode); + qht_init(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE, mode); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -816,7 +800,7 @@ void tcg_exec_init(unsigned long tb_size) #if defined(CONFIG_SOFTMMU) /* There's no guest base to take into account, so go ahead and initialize the prologue now. */ - tcg_prologue_init(&tcg_ctx); + tcg_prologue_init(tcg_ctx); #endif } @@ -829,38 +813,22 @@ void tcg_exec_init(unsigned long tb_size) static TranslationBlock *tb_alloc(target_ulong pc) { TranslationBlock *tb; - TBContext *ctx; assert_tb_locked(); - tb = tcg_tb_alloc(&tcg_ctx); + tb = tcg_tb_alloc(tcg_ctx); if (unlikely(tb == NULL)) { return NULL; } - ctx = &tcg_ctx.tb_ctx; - if (unlikely(ctx->nb_tbs == ctx->tbs_size)) { - ctx->tbs_size *= 2; - ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size); - } - ctx->tbs[ctx->nb_tbs++] = tb; return tb; } /* Called with tb_lock held. */ -void tb_free(TranslationBlock *tb) +void tb_remove(TranslationBlock *tb) { assert_tb_locked(); - /* In practice this is mostly used for single use temporary TB - Ignore the hard cases and just back up if this TB happens to - be the last one generated. */ - if (tcg_ctx.tb_ctx.nb_tbs > 0 && - tb == tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) { - size_t struct_size = ROUND_UP(sizeof(*tb), qemu_icache_linesize); - - tcg_ctx.code_gen_ptr = tb->tc.ptr - struct_size; - tcg_ctx.tb_ctx.nb_tbs--; - } + g_tree_remove(tb_ctx.tb_tree, &tb->tc); } static inline void invalidate_page_bitmap(PageDesc *p) @@ -905,6 +873,15 @@ static void page_flush_tb(void) } } +static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data) +{ + const TranslationBlock *tb = value; + size_t *size = data; + + *size += tb->tc.size; + return false; +} + /* flush all the translation blocks */ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) { @@ -913,35 +890,34 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) /* If it is already been done on request of another CPU, * just retry. */ - if (tcg_ctx.tb_ctx.tb_flush_count != tb_flush_count.host_int) { + if (tb_ctx.tb_flush_count != tb_flush_count.host_int) { goto done; } if (DEBUG_TB_FLUSH_GATE) { - printf("qemu: flush code_size=%td nb_tbs=%d avg_tb_size=%td\n", - tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? - (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) / - tcg_ctx.tb_ctx.nb_tbs : 0); - } - if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) - > tcg_ctx.code_gen_buffer_size) { - cpu_abort(cpu, "Internal error: code buffer overflow\n"); + size_t nb_tbs = g_tree_nnodes(tb_ctx.tb_tree); + size_t host_size = 0; + + g_tree_foreach(tb_ctx.tb_tree, tb_host_size_iter, &host_size); + printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n", + tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0); } CPU_FOREACH(cpu) { cpu_tb_jmp_cache_clear(cpu); } - tcg_ctx.tb_ctx.nb_tbs = 0; - qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE); + /* Increment the refcount first so that destroy acts as a reset */ + g_tree_ref(tb_ctx.tb_tree); + g_tree_destroy(tb_ctx.tb_tree); + + qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE); page_flush_tb(); - tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; + tcg_region_reset_all(); /* XXX: flush processor icache at this point if cache flush is expensive */ - atomic_mb_set(&tcg_ctx.tb_ctx.tb_flush_count, - tcg_ctx.tb_ctx.tb_flush_count + 1); + atomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1); done: tb_unlock(); @@ -950,7 +926,7 @@ done: void tb_flush(CPUState *cpu) { if (tcg_enabled()) { - unsigned tb_flush_count = atomic_mb_read(&tcg_ctx.tb_ctx.tb_flush_count); + unsigned tb_flush_count = atomic_mb_read(&tb_ctx.tb_flush_count); async_safe_run_on_cpu(cpu, do_tb_flush, RUN_ON_CPU_HOST_INT(tb_flush_count)); } @@ -983,7 +959,7 @@ do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) static void tb_invalidate_check(target_ulong address) { address &= TARGET_PAGE_MASK; - qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_invalidate_check, &address); + qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address); } static void @@ -1003,7 +979,7 @@ do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp) /* verify that all the pages have correct rights for code */ static void tb_page_check(void) { - qht_iter(&tcg_ctx.tb_ctx.htable, do_tb_page_check, NULL); + qht_iter(&tb_ctx.htable, do_tb_page_check, NULL); } #endif /* CONFIG_USER_ONLY */ @@ -1101,8 +1077,11 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate); - qht_remove(&tcg_ctx.tb_ctx.htable, tb, h); + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + tb->trace_vcpu_dstate); + if (!qht_remove(&tb_ctx.htable, tb, h)) { + return; + } /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { @@ -1131,7 +1110,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) /* suppress any remaining jumps to this TB */ tb_jmp_unlink(tb); - tcg_ctx.tb_ctx.tb_phys_invalidate_count++; + tb_ctx.tb_phys_invalidate_count++; } #ifdef CONFIG_SOFTMMU @@ -1245,8 +1224,9 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate); - qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + tb->trace_vcpu_dstate); + qht_insert(&tb_ctx.htable, tb, h); #ifdef CONFIG_USER_ONLY if (DEBUG_TB_CHECK_GATE) { @@ -1267,18 +1247,16 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_insn_unit *gen_code_buf; int gen_code_size, search_size; #ifdef CONFIG_PROFILER + TCGProfile *prof = &tcg_ctx->prof; int64_t ti; #endif assert_memory_lock(); phys_pc = get_page_addr_code(env, pc); - if (use_icount && !(cflags & CF_IGNORE_ICOUNT)) { - cflags |= CF_USE_ICOUNT; - } + buffer_overflow: tb = tb_alloc(pc); if (unlikely(!tb)) { - buffer_overflow: /* flush must be done */ tb_flush(cpu); mmap_unlock(); @@ -1287,43 +1265,44 @@ TranslationBlock *tb_gen_code(CPUState *cpu, cpu_loop_exit(cpu); } - gen_code_buf = tcg_ctx.code_gen_ptr; + gen_code_buf = tcg_ctx->code_gen_ptr; tb->tc.ptr = gen_code_buf; tb->pc = pc; tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; + tcg_ctx->tb_cflags = cflags; #ifdef CONFIG_PROFILER - tcg_ctx.tb_count1++; /* includes aborted translations because of - exceptions */ + /* includes aborted translations because of exceptions */ + atomic_set(&prof->tb_count1, prof->tb_count1 + 1); ti = profile_getclock(); #endif - tcg_func_start(&tcg_ctx); + tcg_func_start(tcg_ctx); - tcg_ctx.cpu = ENV_GET_CPU(env); + tcg_ctx->cpu = ENV_GET_CPU(env); gen_intermediate_code(cpu, tb); - tcg_ctx.cpu = NULL; + tcg_ctx->cpu = NULL; trace_translate_block(tb, tb->pc, tb->tc.ptr); /* generate machine code */ tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; - tcg_ctx.tb_jmp_reset_offset = tb->jmp_reset_offset; + tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; if (TCG_TARGET_HAS_direct_jump) { - tcg_ctx.tb_jmp_insn_offset = tb->jmp_target_arg; - tcg_ctx.tb_jmp_target_addr = NULL; + tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; + tcg_ctx->tb_jmp_target_addr = NULL; } else { - tcg_ctx.tb_jmp_insn_offset = NULL; - tcg_ctx.tb_jmp_target_addr = tb->jmp_target_arg; + tcg_ctx->tb_jmp_insn_offset = NULL; + tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; } #ifdef CONFIG_PROFILER - tcg_ctx.tb_count++; - tcg_ctx.interm_time += profile_getclock() - ti; + atomic_set(&prof->tb_count, prof->tb_count + 1); + atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti); ti = profile_getclock(); #endif @@ -1332,7 +1311,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, the tcg optimization currently hidden inside tcg_gen_code. All that should be required is to flush the TBs, allocate a new TB, re-initialize it per above, and re-do the actual code generation. */ - gen_code_size = tcg_gen_code(&tcg_ctx, tb); + gen_code_size = tcg_gen_code(tcg_ctx, tb); if (unlikely(gen_code_size < 0)) { goto buffer_overflow; } @@ -1340,12 +1319,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu, if (unlikely(search_size < 0)) { goto buffer_overflow; } + tb->tc.size = gen_code_size; #ifdef CONFIG_PROFILER - tcg_ctx.code_time += profile_getclock() - ti; - tcg_ctx.code_in_len += tb->size; - tcg_ctx.code_out_len += gen_code_size; - tcg_ctx.search_out_len += search_size; + atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); + atomic_set(&prof->code_in_len, prof->code_in_len + tb->size); + atomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); + atomic_set(&prof->search_out_len, prof->search_out_len + search_size); #endif #ifdef DEBUG_DISAS @@ -1353,8 +1333,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, qemu_log_in_addr_range(tb->pc)) { qemu_log_lock(); qemu_log("OUT: [size=%d]\n", gen_code_size); - if (tcg_ctx.data_gen_ptr) { - size_t code_size = tcg_ctx.data_gen_ptr - tb->tc.ptr; + if (tcg_ctx->data_gen_ptr) { + size_t code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr; size_t data_size = gen_code_size - code_size; size_t i; @@ -1363,12 +1343,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu, for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { if (sizeof(tcg_target_ulong) == 8) { qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", - (uintptr_t)tcg_ctx.data_gen_ptr + i, - *(uint64_t *)(tcg_ctx.data_gen_ptr + i)); + (uintptr_t)tcg_ctx->data_gen_ptr + i, + *(uint64_t *)(tcg_ctx->data_gen_ptr + i)); } else { qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", - (uintptr_t)tcg_ctx.data_gen_ptr + i, - *(uint32_t *)(tcg_ctx.data_gen_ptr + i)); + (uintptr_t)tcg_ctx->data_gen_ptr + i, + *(uint32_t *)(tcg_ctx->data_gen_ptr + i)); } } } else { @@ -1380,9 +1360,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #endif - tcg_ctx.code_gen_ptr = (void *) + atomic_set(&tcg_ctx->code_gen_ptr, (void *) ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, - CODE_GEN_ALIGN); + CODE_GEN_ALIGN)); /* init jump list */ assert(((uintptr_t)tb & 3) == 0); @@ -1410,6 +1390,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, * through the physical hash table and physical page list. */ tb_link_page(tb, phys_pc, phys_page2); + g_tree_insert(tb_ctx.tb_tree, &tb->tc, tb); return tb; } @@ -1461,14 +1442,12 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access) { TranslationBlock *tb, *tb_next; -#if defined(TARGET_HAS_PRECISE_SMC) - CPUState *cpu = current_cpu; - CPUArchState *env = NULL; -#endif tb_page_addr_t tb_start, tb_end; PageDesc *p; int n; #ifdef TARGET_HAS_PRECISE_SMC + CPUState *cpu = current_cpu; + CPUArchState *env = NULL; int current_tb_not_found = is_cpu_write_access; TranslationBlock *current_tb = NULL; int current_tb_modified = 0; @@ -1545,10 +1524,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, #endif #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { - /* we generate a block containing just the instruction - modifying the memory. It will ensure that it cannot modify - itself */ - tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1); + /* Force execution of one insn next time. */ + cpu->cflags_next_tb = 1 | curr_cflags(); cpu_loop_exit_noexc(cpu); } #endif @@ -1663,10 +1640,8 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) p->first_tb = NULL; #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { - /* we generate a block containing just the instruction - modifying the memory. It will ensure that it cannot modify - itself */ - tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1); + /* Force execution of one insn next time. */ + cpu->cflags_next_tb = 1 | curr_cflags(); /* tb_lock will be reset after cpu_loop_exit_noexc longjmps * back into the cpu_exec loop. */ return true; @@ -1678,37 +1653,16 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) } #endif -/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < - tb[1].tc_ptr. Return NULL if not found */ +/* + * Find the TB 'tb' such that + * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size + * Return NULL if not found. + */ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) { - int m_min, m_max, m; - uintptr_t v; - TranslationBlock *tb; + struct tb_tc s = { .ptr = (void *)tc_ptr }; - if (tcg_ctx.tb_ctx.nb_tbs <= 0) { - return NULL; - } - if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer || - tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) { - return NULL; - } - /* binary search (cf Knuth) */ - m_min = 0; - m_max = tcg_ctx.tb_ctx.nb_tbs - 1; - while (m_min <= m_max) { - m = (m_min + m_max) >> 1; - tb = tcg_ctx.tb_ctx.tbs[m]; - v = (uintptr_t)tb->tc.ptr; - if (v == tc_ptr) { - return tb; - } else if (tc_ptr < v) { - m_max = m - 1; - } else { - m_min = m + 1; - } - } - return tcg_ctx.tb_ctx.tbs[m_max]; + return g_tree_lookup(tb_ctx.tb_tree, &s); } #if !defined(CONFIG_USER_ONLY) @@ -1769,9 +1723,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) CPUArchState *env = cpu->env_ptr; #endif TranslationBlock *tb; - uint32_t n, cflags; - target_ulong pc, cs_base; - uint32_t flags; + uint32_t n; tb_lock(); tb = tb_find_pc(retaddr); @@ -1809,22 +1761,17 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) cpu_abort(cpu, "TB too big during recompile"); } - cflags = n | CF_LAST_IO; - pc = tb->pc; - cs_base = tb->cs_base; - flags = tb->flags; - tb_phys_invalidate(tb, -1); + /* Adjust the execution state of the next TB. */ + cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n; + if (tb->cflags & CF_NOCACHE) { if (tb->orig_tb) { /* Invalidate original TB if this TB was generated in * cpu_exec_nocache() */ tb_phys_invalidate(tb->orig_tb, -1); } - tb_free(tb); + tb_remove(tb); } - /* FIXME: In theory this could raise an exception. In practice - we have already translated the block once so it's probably ok. */ - tb_gen_code(cpu, pc, cs_base, flags, cflags); /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not * the first in the TB) then we end up generating a whole new TB and @@ -1893,73 +1840,79 @@ static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf, g_free(hgram); } +struct tb_tree_stats { + size_t host_size; + size_t target_size; + size_t max_target_size; + size_t direct_jmp_count; + size_t direct_jmp2_count; + size_t cross_page; +}; + +static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) +{ + const TranslationBlock *tb = value; + struct tb_tree_stats *tst = data; + + tst->host_size += tb->tc.size; + tst->target_size += tb->size; + if (tb->size > tst->max_target_size) { + tst->max_target_size = tb->size; + } + if (tb->page_addr[1] != -1) { + tst->cross_page++; + } + if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { + tst->direct_jmp_count++; + if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { + tst->direct_jmp2_count++; + } + } + return false; +} + void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) { - int i, target_code_size, max_target_code_size; - int direct_jmp_count, direct_jmp2_count, cross_page; - TranslationBlock *tb; + struct tb_tree_stats tst = {}; struct qht_stats hst; + size_t nb_tbs; tb_lock(); - target_code_size = 0; - max_target_code_size = 0; - cross_page = 0; - direct_jmp_count = 0; - direct_jmp2_count = 0; - for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) { - tb = tcg_ctx.tb_ctx.tbs[i]; - target_code_size += tb->size; - if (tb->size > max_target_code_size) { - max_target_code_size = tb->size; - } - if (tb->page_addr[1] != -1) { - cross_page++; - } - if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { - direct_jmp_count++; - if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { - direct_jmp2_count++; - } - } - } + nb_tbs = g_tree_nnodes(tb_ctx.tb_tree); + g_tree_foreach(tb_ctx.tb_tree, tb_tree_stats_iter, &tst); /* XXX: avoid using doubles ? */ cpu_fprintf(f, "Translation buffer state:\n"); - cpu_fprintf(f, "gen code size %td/%zd\n", - tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, - tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer); - cpu_fprintf(f, "TB count %d\n", tcg_ctx.tb_ctx.nb_tbs); - cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", - tcg_ctx.tb_ctx.nb_tbs ? target_code_size / - tcg_ctx.tb_ctx.nb_tbs : 0, - max_target_code_size); - cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n", - tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr - - tcg_ctx.code_gen_buffer) / - tcg_ctx.tb_ctx.nb_tbs : 0, - target_code_size ? (double) (tcg_ctx.code_gen_ptr - - tcg_ctx.code_gen_buffer) / - target_code_size : 0); - cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page, - tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0); - cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n", - direct_jmp_count, - tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0, - direct_jmp2_count, - tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0); - - qht_statistics_init(&tcg_ctx.tb_ctx.htable, &hst); + /* + * Report total code size including the padding and TB structs; + * otherwise users might think "-tb-size" is not honoured. + * For avg host size we use the precise numbers from tb_tree_stats though. + */ + cpu_fprintf(f, "gen code size %zu/%zu\n", + tcg_code_size(), tcg_code_capacity()); + cpu_fprintf(f, "TB count %zu\n", nb_tbs); + cpu_fprintf(f, "TB avg target size %zu max=%zu bytes\n", + nb_tbs ? tst.target_size / nb_tbs : 0, + tst.max_target_size); + cpu_fprintf(f, "TB avg host size %zu bytes (expansion ratio: %0.1f)\n", + nb_tbs ? tst.host_size / nb_tbs : 0, + tst.target_size ? (double)tst.host_size / tst.target_size : 0); + cpu_fprintf(f, "cross page TB count %zu (%zu%%)\n", tst.cross_page, + nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); + cpu_fprintf(f, "direct jump count %zu (%zu%%) (2 jumps=%zu %zu%%)\n", + tst.direct_jmp_count, + nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, + tst.direct_jmp2_count, + nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); + + qht_statistics_init(&tb_ctx.htable, &hst); print_qht_statistics(f, cpu_fprintf, hst); qht_statistics_destroy(&hst); cpu_fprintf(f, "\nStatistics:\n"); cpu_fprintf(f, "TB flush count %u\n", - atomic_read(&tcg_ctx.tb_ctx.tb_flush_count)); - cpu_fprintf(f, "TB invalidate count %d\n", - tcg_ctx.tb_ctx.tb_phys_invalidate_count); + atomic_read(&tb_ctx.tb_flush_count)); + cpu_fprintf(f, "TB invalidate count %d\n", tb_ctx.tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %zu\n", tlb_flush_count()); tcg_dump_info(f, cpu_fprintf); |