diff options
Diffstat (limited to 'accel/tcg')
-rw-r--r-- | accel/tcg/cpu-exec.c | 80 | ||||
-rw-r--r-- | accel/tcg/cputlb.c | 259 | ||||
-rw-r--r-- | accel/tcg/internal.h | 10 | ||||
-rw-r--r-- | accel/tcg/plugin-gen.c | 22 | ||||
-rw-r--r-- | accel/tcg/tb-hash.h | 1 | ||||
-rw-r--r-- | accel/tcg/tb-jmp-cache.h | 65 | ||||
-rw-r--r-- | accel/tcg/translate-all.c | 214 | ||||
-rw-r--r-- | accel/tcg/translator.c | 2 |
8 files changed, 379 insertions, 274 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 5f43b9769a..f9e5cc9ba0 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -42,6 +42,7 @@ #include "sysemu/replay.h" #include "sysemu/tcg.h" #include "exec/helper-proto.h" +#include "tb-jmp-cache.h" #include "tb-hash.h" #include "tb-context.h" #include "internal.h" @@ -174,7 +175,7 @@ struct tb_desc { target_ulong pc; target_ulong cs_base; CPUArchState *env; - tb_page_addr_t phys_page1; + tb_page_addr_t page_addr0; uint32_t flags; uint32_t cflags; uint32_t trace_vcpu_dstate; @@ -185,8 +186,8 @@ static bool tb_lookup_cmp(const void *p, const void *d) const TranslationBlock *tb = p; const struct tb_desc *desc = d; - if (tb->pc == desc->pc && - tb->page_addr[0] == desc->phys_page1 && + if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) && + tb->page_addr[0] == desc->page_addr0 && tb->cs_base == desc->cs_base && tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && @@ -195,8 +196,8 @@ static bool tb_lookup_cmp(const void *p, const void *d) if (tb->page_addr[1] == -1) { return true; } else { - tb_page_addr_t phys_page2; - target_ulong virt_page2; + tb_page_addr_t phys_page1; + target_ulong virt_page1; /* * We know that the first page matched, and an otherwise valid TB @@ -207,9 +208,9 @@ static bool tb_lookup_cmp(const void *p, const void *d) * is different for the new TB. Therefore any exception raised * here by the faulting lookup is not premature. */ - virt_page2 = TARGET_PAGE_ALIGN(desc->pc); - phys_page2 = get_page_addr_code(desc->env, virt_page2); - if (tb->page_addr[1] == phys_page2) { + virt_page1 = TARGET_PAGE_ALIGN(desc->pc); + phys_page1 = get_page_addr_code(desc->env, virt_page1); + if (tb->page_addr[1] == phys_page1) { return true; } } @@ -235,8 +236,9 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, if (phys_pc == -1) { return NULL; } - desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); + desc.page_addr0 = phys_pc; + h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc), + flags, cflags, *cpu->trace_dstate); return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); } @@ -246,16 +248,18 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, uint32_t flags, uint32_t cflags) { TranslationBlock *tb; + CPUJumpCache *jc; uint32_t hash; /* we should never be trying to look up an INVALID tb */ tcg_debug_assert(!(cflags & CF_INVALID)); hash = tb_jmp_cache_hash_func(pc); - tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); + jc = cpu->tb_jmp_cache; + tb = tb_jmp_cache_get_tb(jc, hash); if (likely(tb && - tb->pc == pc && + tb_jmp_cache_get_pc(jc, hash, tb) == pc && tb->cs_base == cs_base && tb->flags == flags && tb->trace_vcpu_dstate == *cpu->trace_dstate && @@ -266,16 +270,14 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, if (tb == NULL) { return NULL; } - qatomic_set(&cpu->tb_jmp_cache[hash], tb); + tb_jmp_cache_set(jc, hash, tb, pc); return tb; } -static inline void log_cpu_exec(target_ulong pc, CPUState *cpu, - const TranslationBlock *tb) +static void log_cpu_exec(target_ulong pc, CPUState *cpu, + const TranslationBlock *tb) { - if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) - && qemu_log_in_addr_range(pc)) { - + if (qemu_log_in_addr_range(pc)) { qemu_log_mask(CPU_LOG_EXEC, "Trace %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx "/%08x/%08x] %s\n", @@ -399,7 +401,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) return tcg_code_gen_epilogue; } - log_cpu_exec(pc, cpu, tb); + if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) { + log_cpu_exec(pc, cpu, tb); + } return tb->tc.ptr; } @@ -422,7 +426,9 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) TranslationBlock *last_tb; const void *tb_ptr = itb->tc.ptr; - log_cpu_exec(itb->pc, cpu, itb); + if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) { + log_cpu_exec(log_pc(cpu, itb), cpu, itb); + } qemu_thread_jit_execute(); ret = tcg_qemu_tb_exec(env, tb_ptr); @@ -446,16 +452,21 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) * of the start of the TB. */ CPUClass *cc = CPU_GET_CLASS(cpu); - qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc, - "Stopped execution of TB chain before %p [" - TARGET_FMT_lx "] %s\n", - last_tb->tc.ptr, last_tb->pc, - lookup_symbol(last_tb->pc)); + if (cc->tcg_ops->synchronize_from_tb) { cc->tcg_ops->synchronize_from_tb(cpu, last_tb); } else { + assert(!TARGET_TB_PCREL); assert(cc->set_pc); - cc->set_pc(cpu, last_tb->pc); + cc->set_pc(cpu, tb_pc(last_tb)); + } + if (qemu_loglevel_mask(CPU_LOG_EXEC)) { + target_ulong pc = log_pc(cpu, last_tb); + if (qemu_log_in_addr_range(pc)) { + qemu_log("Stopped execution of TB chain before %p [" + TARGET_FMT_lx "] %s\n", + last_tb->tc.ptr, pc, lookup_symbol(pc)); + } } } @@ -597,11 +608,8 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, qemu_spin_unlock(&tb_next->jmp_lock); - qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, - "Linking TBs %p [" TARGET_FMT_lx - "] index %d -> %p [" TARGET_FMT_lx "]\n", - tb->tc.ptr, tb->pc, n, - tb_next->tc.ptr, tb_next->pc); + qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n", + tb->tc.ptr, n, tb_next->tc.ptr); return; out_unlock_next: @@ -847,11 +855,12 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, + target_ulong pc, TranslationBlock **last_tb, int *tb_exit) { int32_t insns_left; - trace_exec_tb(tb, tb->pc); + trace_exec_tb(tb, pc); tb = cpu_tb_exec(cpu, tb, tb_exit); if (*tb_exit != TB_EXIT_REQUESTED) { *last_tb = tb; @@ -987,6 +996,8 @@ int cpu_exec(CPUState *cpu) tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { + uint32_t h; + mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); mmap_unlock(); @@ -994,7 +1005,8 @@ int cpu_exec(CPUState *cpu) * We add the TB in the virtual pc hash table * for the fast lookup */ - qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); + h = tb_jmp_cache_hash_func(pc); + tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc); } #ifndef CONFIG_USER_ONLY @@ -1013,7 +1025,7 @@ int cpu_exec(CPUState *cpu) tb_add_jump(last_tb, tb_exit, tb); } - cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit); + cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit); /* Try to align the host and virtual clocks if the guest is in advance */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 8fad2d9b83..6f1c00682b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -100,21 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) { - unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); + int i, i0 = tb_jmp_cache_hash_page(page_addr); + CPUJumpCache *jc = cpu->tb_jmp_cache; for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { - qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); + qatomic_set(&jc->array[i0 + i].tb, NULL); } } -static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) -{ - /* Discard jump cache entries for any tb which might potentially - overlap the flushed page. */ - tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE); - tb_jmp_cache_clear_page(cpu, addr); -} - /** * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary * @desc: The CPUTLBDesc portion of the TLB @@ -200,13 +193,13 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, } g_free(fast->table); - g_free(desc->iotlb); + g_free(desc->fulltlb); tlb_window_reset(desc, now, 0); /* desc->n_used_entries is cleared by the caller */ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; fast->table = g_try_new(CPUTLBEntry, new_size); - desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); + desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size); /* * If the allocations fail, try smaller sizes. We just freed some @@ -215,7 +208,7 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, * allocations to fail though, so we progressively reduce the allocation * size, aborting if we cannot even allocate the smallest TLB we support. */ - while (fast->table == NULL || desc->iotlb == NULL) { + while (fast->table == NULL || desc->fulltlb == NULL) { if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { error_report("%s: %s", __func__, strerror(errno)); abort(); @@ -224,9 +217,9 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; g_free(fast->table); - g_free(desc->iotlb); + g_free(desc->fulltlb); fast->table = g_try_new(CPUTLBEntry, new_size); - desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); + desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size); } } @@ -258,7 +251,7 @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) desc->n_used_entries = 0; fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; fast->table = g_new(CPUTLBEntry, n_entries); - desc->iotlb = g_new(CPUIOTLBEntry, n_entries); + desc->fulltlb = g_new(CPUTLBEntryFull, n_entries); tlb_mmu_flush_locked(desc, fast); } @@ -299,7 +292,7 @@ void tlb_destroy(CPUState *cpu) CPUTLBDescFast *fast = &env_tlb(env)->f[i]; g_free(fast->table); - g_free(desc->iotlb); + g_free(desc->fulltlb); } } @@ -364,7 +357,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) qemu_spin_unlock(&env_tlb(env)->c.lock); - cpu_tb_jmp_cache_clear(cpu); + tcg_flush_jmp_cache(cpu); if (to_clean == ALL_MMUIDX_BITS) { qatomic_set(&env_tlb(env)->c.full_flush_count, @@ -541,7 +534,12 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, } qemu_spin_unlock(&env_tlb(env)->c.lock); - tb_flush_jmp_cache(cpu, addr); + /* + * Discard jump cache entries for any tb which might potentially + * overlap the flushed page, which includes the previous. + */ + tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE); + tb_jmp_cache_clear_page(cpu, addr); } /** @@ -788,12 +786,18 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, * longer to clear each entry individually than it will to clear it all. */ if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) { - cpu_tb_jmp_cache_clear(cpu); + tcg_flush_jmp_cache(cpu); return; } - for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) { - tb_flush_jmp_cache(cpu, d.addr + i); + /* + * Discard jump cache entries for any tb which might potentially + * overlap the flushed pages, which includes the previous. + */ + d.addr -= TARGET_PAGE_SIZE; + for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) { + tb_jmp_cache_clear_page(cpu, d.addr); + d.addr += TARGET_PAGE_SIZE; } } @@ -951,7 +955,8 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, can be detected */ void tlb_protect_code(ram_addr_t ram_addr) { - cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, + cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK, + TARGET_PAGE_SIZE, DIRTY_MEMORY_CODE); } @@ -1095,16 +1100,16 @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx, env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; } -/* Add a new TLB entry. At most one entry for a given virtual address +/* + * Add a new TLB entry. At most one entry for a given virtual address * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the * supplied size is only used by tlb_flush_page. * * Called from TCG-generated code, which is under an RCU read-side * critical section. */ -void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, - hwaddr paddr, MemTxAttrs attrs, int prot, - int mmu_idx, target_ulong size) +void tlb_set_page_full(CPUState *cpu, int mmu_idx, + target_ulong vaddr, CPUTLBEntryFull *full) { CPUArchState *env = cpu->env_ptr; CPUTLB *tlb = env_tlb(env); @@ -1117,35 +1122,36 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; target_ulong vaddr_page; - int asidx = cpu_asidx_from_attrs(cpu, attrs); - int wp_flags; + int asidx, wp_flags, prot; bool is_ram, is_romd; assert_cpu_is_self(cpu); - if (size <= TARGET_PAGE_SIZE) { + if (full->lg_page_size <= TARGET_PAGE_BITS) { sz = TARGET_PAGE_SIZE; } else { - tlb_add_large_page(env, mmu_idx, vaddr, size); - sz = size; + sz = (hwaddr)1 << full->lg_page_size; + tlb_add_large_page(env, mmu_idx, vaddr, sz); } vaddr_page = vaddr & TARGET_PAGE_MASK; - paddr_page = paddr & TARGET_PAGE_MASK; + paddr_page = full->phys_addr & TARGET_PAGE_MASK; + prot = full->prot; + asidx = cpu_asidx_from_attrs(cpu, full->attrs); section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, - &xlat, &sz, attrs, &prot); + &xlat, &sz, full->attrs, &prot); assert(sz >= TARGET_PAGE_SIZE); tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx " prot=%x idx=%d\n", - vaddr, paddr, prot, mmu_idx); + vaddr, full->phys_addr, prot, mmu_idx); address = vaddr_page; - if (size < TARGET_PAGE_SIZE) { + if (full->lg_page_size < TARGET_PAGE_BITS) { /* Repeat the MMU check and TLB fill on every access. */ address |= TLB_INVALID_MASK; } - if (attrs.byte_swap) { + if (full->attrs.byte_swap) { address |= TLB_BSWAP; } @@ -1219,7 +1225,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, /* Evict the old entry into the victim tlb. */ copy_tlb_helper_locked(tv, te); - desc->viotlb[vidx] = desc->iotlb[index]; + desc->vfulltlb[vidx] = desc->fulltlb[index]; tlb_n_used_entries_dec(env, mmu_idx); } @@ -1236,8 +1242,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, * subtract here is that of the page base, and not the same as the * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). */ - desc->iotlb[index].addr = iotlb - vaddr_page; - desc->iotlb[index].attrs = attrs; + desc->fulltlb[index] = *full; + desc->fulltlb[index].xlat_section = iotlb - vaddr_page; + desc->fulltlb[index].phys_addr = paddr_page; + desc->fulltlb[index].prot = prot; /* Now calculate the new entry */ tn.addend = addend - vaddr_page; @@ -1272,9 +1280,21 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, qemu_spin_unlock(&tlb->c.lock); } -/* Add a new TLB entry, but without specifying the memory - * transaction attributes to be used. - */ +void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, + hwaddr paddr, MemTxAttrs attrs, int prot, + int mmu_idx, target_ulong size) +{ + CPUTLBEntryFull full = { + .phys_addr = paddr, + .attrs = attrs, + .prot = prot, + .lg_page_size = ctz64(size) + }; + + assert(is_power_of_2(size)); + tlb_set_page_full(cpu, mmu_idx, vaddr, &full); +} + void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size) @@ -1291,15 +1311,14 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, static void tlb_fill(CPUState *cpu, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { - CPUClass *cc = CPU_GET_CLASS(cpu); bool ok; /* * This is not a probe, so only valid return is success; failure * should result in exception + longjmp to the cpu loop. */ - ok = cc->tcg_ops->tlb_fill(cpu, addr, size, - access_type, mmu_idx, false, retaddr); + ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size, + access_type, mmu_idx, false, retaddr); assert(ok); } @@ -1307,9 +1326,8 @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { - CPUClass *cc = CPU_GET_CLASS(cpu); - - cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr); + cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, + mmu_idx, retaddr); } static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, @@ -1329,7 +1347,7 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, } } -static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, +static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, int mmu_idx, target_ulong addr, uintptr_t retaddr, MMUAccessType access_type, MemOp op) { @@ -1341,9 +1359,9 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); + section = iotlb_to_section(cpu, full->xlat_section, full->attrs); mr = section->mr; - mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; + mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); @@ -1353,14 +1371,14 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, qemu_mutex_lock_iothread(); locked = true; } - r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); + r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); if (r != MEMTX_OK) { hwaddr physaddr = mr_offset + section->offset_within_address_space - section->offset_within_region; cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, - mmu_idx, iotlbentry->attrs, r, retaddr); + mmu_idx, full->attrs, r, retaddr); } if (locked) { qemu_mutex_unlock_iothread(); @@ -1370,22 +1388,21 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, } /* - * Save a potentially trashed IOTLB entry for later lookup by plugin. - * This is read by tlb_plugin_lookup if the iotlb entry doesn't match + * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin. + * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match * because of the side effect of io_writex changing memory layout. */ -static void save_iotlb_data(CPUState *cs, hwaddr addr, - MemoryRegionSection *section, hwaddr mr_offset) +static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section, + hwaddr mr_offset) { #ifdef CONFIG_PLUGIN SavedIOTLB *saved = &cs->saved_iotlb; - saved->addr = addr; saved->section = section; saved->mr_offset = mr_offset; #endif } -static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, +static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, int mmu_idx, uint64_t val, target_ulong addr, uintptr_t retaddr, MemOp op) { @@ -1396,9 +1413,9 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); + section = iotlb_to_section(cpu, full->xlat_section, full->attrs); mr = section->mr; - mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; + mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr; if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } @@ -1408,20 +1425,20 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, * The memory_region_dispatch may trigger a flush/resize * so for plugins we save the iotlb_data just in case. */ - save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset); + save_iotlb_data(cpu, section, mr_offset); if (!qemu_mutex_iothread_locked()) { qemu_mutex_lock_iothread(); locked = true; } - r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); + r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs); if (r != MEMTX_OK) { hwaddr physaddr = mr_offset + section->offset_within_address_space - section->offset_within_region; cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), - MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, + MMU_DATA_STORE, mmu_idx, full->attrs, r, retaddr); } if (locked) { @@ -1468,9 +1485,10 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, copy_tlb_helper_locked(vtlb, &tmptlb); qemu_spin_unlock(&env_tlb(env)->c.lock); - CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; - CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; - tmpio = *io; *io = *vio; *vio = tmpio; + CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx]; + CPUTLBEntryFull tmpf; + tmpf = *f1; *f1 = *f2; *f2 = tmpf; return true; } } @@ -1483,9 +1501,9 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, (ADDR) & TARGET_PAGE_MASK) static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, - CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) + CPUTLBEntryFull *full, uintptr_t retaddr) { - ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; + ram_addr_t ram_addr = mem_vaddr + full->xlat_section; trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); @@ -1512,7 +1530,8 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, static int probe_access_internal(CPUArchState *env, target_ulong addr, int fault_size, MMUAccessType access_type, int mmu_idx, bool nonfault, - void **phost, uintptr_t retaddr) + void **phost, CPUTLBEntryFull **pfull, + uintptr_t retaddr) { uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); @@ -1535,25 +1554,36 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, } tlb_addr = tlb_read_ofs(entry, elt_ofs); + flags = TLB_FLAGS_MASK; page_addr = addr & TARGET_PAGE_MASK; if (!tlb_hit_page(tlb_addr, page_addr)) { if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { CPUState *cs = env_cpu(env); - CPUClass *cc = CPU_GET_CLASS(cs); - if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type, - mmu_idx, nonfault, retaddr)) { + if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type, + mmu_idx, nonfault, retaddr)) { /* Non-faulting page table read failed. */ *phost = NULL; + *pfull = NULL; return TLB_INVALID_MASK; } /* TLB resize via tlb_fill may have moved the entry. */ + index = tlb_index(env, mmu_idx, addr); entry = tlb_entry(env, mmu_idx, addr); + + /* + * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately, + * to force the next access through tlb_fill. We've just + * called tlb_fill, so we know that this entry *is* valid. + */ + flags &= ~TLB_INVALID_MASK; } tlb_addr = tlb_read_ofs(entry, elt_ofs); } - flags = tlb_addr & TLB_FLAGS_MASK; + flags &= tlb_addr; + + *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index]; /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { @@ -1566,37 +1596,44 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, return flags; } -int probe_access_flags(CPUArchState *env, target_ulong addr, - MMUAccessType access_type, int mmu_idx, - bool nonfault, void **phost, uintptr_t retaddr) +int probe_access_full(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, CPUTLBEntryFull **pfull, + uintptr_t retaddr) { - int flags; - - flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, - nonfault, phost, retaddr); + int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, + nonfault, phost, pfull, retaddr); /* Handle clean RAM pages. */ if (unlikely(flags & TLB_NOTDIRTY)) { - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; - - notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); + notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr); flags &= ~TLB_NOTDIRTY; } return flags; } +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr) +{ + CPUTLBEntryFull *full; + + return probe_access_full(env, addr, access_type, mmu_idx, + nonfault, phost, &full, retaddr); +} + void *probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { + CPUTLBEntryFull *full; void *host; int flags; g_assert(-(addr | TARGET_PAGE_MASK) >= size); flags = probe_access_internal(env, addr, size, access_type, mmu_idx, - false, &host, retaddr); + false, &host, &full, retaddr); /* Per the interface, size == 0 merely faults the access. */ if (size == 0) { @@ -1604,20 +1641,17 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, } if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; - /* Handle watchpoints. */ if (flags & TLB_WATCHPOINT) { int wp_access = (access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ); cpu_check_watchpoint(env_cpu(env), addr, size, - iotlbentry->attrs, wp_access, retaddr); + full->attrs, wp_access, retaddr); } /* Handle clean RAM pages. */ if (flags & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); + notdirty_write(env_cpu(env), addr, 1, full, retaddr); } } @@ -1627,11 +1661,12 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, MMUAccessType access_type, int mmu_idx) { + CPUTLBEntryFull *full; void *host; int flags; flags = probe_access_internal(env, addr, 0, access_type, - mmu_idx, true, &host, 0); + mmu_idx, true, &host, &full, 0); /* No combination of flags are expected by the caller. */ return flags ? NULL : host; @@ -1650,10 +1685,11 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, void **hostp) { + CPUTLBEntryFull *full; void *p; (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH, - cpu_mmu_index(env, true), false, &p, 0); + cpu_mmu_index(env, true), false, &p, &full, 0); if (p == NULL) { return -1; } @@ -1674,7 +1710,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, * should have just filled the TLB. The one corner case is io_writex * which can cause TLB flushes and potential resizing of the TLBs * losing the information we need. In those cases we need to recover - * data from a copy of the iotlbentry. As long as this always occurs + * data from a copy of the CPUTLBEntryFull. As long as this always occurs * from the same thread (which a mem callback will be) this is safe. */ @@ -1689,11 +1725,12 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, if (likely(tlb_hit(tlb_addr, addr))) { /* We must have an iotlb entry for MMIO */ if (tlb_addr & TLB_MMIO) { - CPUIOTLBEntry *iotlbentry; - iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + CPUTLBEntryFull *full; + full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; data->is_io = true; - data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); - data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; + data->v.io.section = + iotlb_to_section(cpu, full->xlat_section, full->attrs); + data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr; } else { data->is_io = false; data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend); @@ -1801,7 +1838,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, if (unlikely(tlb_addr & TLB_NOTDIRTY)) { notdirty_write(env_cpu(env), addr, size, - &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); + &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr); } return hostaddr; @@ -1909,7 +1946,7 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, /* Handle anything that isn't just a straight memory access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; + CPUTLBEntryFull *full; bool need_swap; /* For anything that is unaligned, recurse through full_load. */ @@ -1917,20 +1954,20 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi, goto do_unaligned_access; } - iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; /* Handle watchpoints. */ if (unlikely(tlb_addr & TLB_WATCHPOINT)) { /* On watchpoint hit, this will longjmp out. */ cpu_check_watchpoint(env_cpu(env), addr, size, - iotlbentry->attrs, BP_MEM_READ, retaddr); + full->attrs, BP_MEM_READ, retaddr); } need_swap = size > 1 && (tlb_addr & TLB_BSWAP); /* Handle I/O access. */ if (likely(tlb_addr & TLB_MMIO)) { - return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, + return io_readx(env, full, mmu_idx, addr, retaddr, access_type, op ^ (need_swap * MO_BSWAP)); } @@ -2245,12 +2282,12 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, */ if (unlikely(tlb_addr & TLB_WATCHPOINT)) { cpu_check_watchpoint(env_cpu(env), addr, size - size2, - env_tlb(env)->d[mmu_idx].iotlb[index].attrs, + env_tlb(env)->d[mmu_idx].fulltlb[index].attrs, BP_MEM_WRITE, retaddr); } if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { cpu_check_watchpoint(env_cpu(env), page2, size2, - env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, + env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs, BP_MEM_WRITE, retaddr); } @@ -2314,7 +2351,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* Handle anything that isn't just a straight memory access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; + CPUTLBEntryFull *full; bool need_swap; /* For anything that is unaligned, recurse through byte stores. */ @@ -2322,20 +2359,20 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, goto do_unaligned_access; } - iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; /* Handle watchpoints. */ if (unlikely(tlb_addr & TLB_WATCHPOINT)) { /* On watchpoint hit, this will longjmp out. */ cpu_check_watchpoint(env_cpu(env), addr, size, - iotlbentry->attrs, BP_MEM_WRITE, retaddr); + full->attrs, BP_MEM_WRITE, retaddr); } need_swap = size > 1 && (tlb_addr & TLB_BSWAP); /* Handle I/O access. */ if (tlb_addr & TLB_MMIO) { - io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, + io_writex(env, full, mmu_idx, val, addr, retaddr, op ^ (need_swap * MO_BSWAP)); return; } @@ -2347,7 +2384,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); + notdirty_write(env_cpu(env), addr, size, full, retaddr); } haddr = (void *)((uintptr_t)addr + entry->addend); diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 3092bfa964..dc800fd485 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -18,4 +18,14 @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); void page_init(void); void tb_htable_init(void); +/* Return the current PC from CPU, which may be cached in TB. */ +static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) +{ +#if TARGET_TB_PCREL + return cpu->cc->get_pc(cpu); +#else + return tb_pc(tb); +#endif +} + #endif /* ACCEL_TCG_INTERNAL_H */ diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index 3d0b101e34..80dff68934 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -852,7 +852,8 @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb) pr_ops(); } -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only) +bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db, + bool mem_only) { bool ret = false; @@ -870,9 +871,9 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl ret = true; - ptb->vaddr = tb->pc; + ptb->vaddr = db->pc_first; ptb->vaddr2 = -1; - get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1); + ptb->haddr1 = db->host_addr[0]; ptb->haddr2 = NULL; ptb->mem_only = mem_only; @@ -898,16 +899,15 @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db) * Note that we skip this when haddr1 == NULL, e.g. when we're * fetching instructions from a region not backed by RAM. */ - if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) && - unlikely((db->pc_next & TARGET_PAGE_MASK) != - (db->pc_first & TARGET_PAGE_MASK))) { - get_page_addr_code_hostp(cpu->env_ptr, db->pc_next, - &ptb->haddr2); - ptb->vaddr2 = db->pc_next; - } - if (likely(ptb->vaddr2 == -1)) { + if (ptb->haddr1 == NULL) { + pinsn->haddr = NULL; + } else if (is_same_page(db, db->pc_next)) { pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr; } else { + if (ptb->vaddr2 == -1) { + ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first); + get_page_addr_code_hostp(cpu->env_ptr, ptb->vaddr2, &ptb->haddr2); + } pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2; } } diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h index 0a273d9605..83dc610e4c 100644 --- a/accel/tcg/tb-hash.h +++ b/accel/tcg/tb-hash.h @@ -23,6 +23,7 @@ #include "exec/cpu-defs.h" #include "exec/exec-all.h" #include "qemu/xxhash.h" +#include "tb-jmp-cache.h" #ifdef CONFIG_SOFTMMU diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h new file mode 100644 index 0000000000..ff5ffc8fc2 --- /dev/null +++ b/accel/tcg/tb-jmp-cache.h @@ -0,0 +1,65 @@ +/* + * The per-CPU TranslationBlock jump cache. + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef ACCEL_TCG_TB_JMP_CACHE_H +#define ACCEL_TCG_TB_JMP_CACHE_H + +#define TB_JMP_CACHE_BITS 12 +#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) + +/* + * Accessed in parallel; all accesses to 'tb' must be atomic. + * For TARGET_TB_PCREL, accesses to 'pc' must be protected by + * a load_acquire/store_release to 'tb'. + */ +struct CPUJumpCache { + struct { + TranslationBlock *tb; +#if TARGET_TB_PCREL + target_ulong pc; +#endif + } array[TB_JMP_CACHE_SIZE]; +}; + +static inline TranslationBlock * +tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash) +{ +#if TARGET_TB_PCREL + /* Use acquire to ensure current load of pc from jc. */ + return qatomic_load_acquire(&jc->array[hash].tb); +#else + /* Use rcu_read to ensure current load of pc from *tb. */ + return qatomic_rcu_read(&jc->array[hash].tb); +#endif +} + +static inline target_ulong +tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb) +{ +#if TARGET_TB_PCREL + return jc->array[hash].pc; +#else + return tb_pc(tb); +#endif +} + +static inline void +tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash, + TranslationBlock *tb, target_ulong pc) +{ +#if TARGET_TB_PCREL + jc->array[hash].pc = pc; + /* Use store_release on tb to ensure pc is written first. */ + qatomic_store_release(&jc->array[hash].tb, tb); +#else + /* Use the pc value already stored in tb->pc. */ + qatomic_set(&jc->array[hash].tb, tb); +#endif +} + +#endif /* ACCEL_TCG_TB_JMP_CACHE_H */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index d71d04d338..4ed75a13e1 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -58,6 +58,7 @@ #include "sysemu/tcg.h" #include "qapi/error.h" #include "hw/core/tcg-cpu-ops.h" +#include "tb-jmp-cache.h" #include "tb-hash.h" #include "tb-context.h" #include "internal.h" @@ -102,21 +103,14 @@ #define assert_memory_lock() tcg_debug_assert(have_mmap_lock()) #endif -#define SMC_BITMAP_USE_THRESHOLD 10 - typedef struct PageDesc { /* list of TBs intersecting this ram page */ uintptr_t first_tb; -#ifdef CONFIG_SOFTMMU - /* in order to optimize self modifying code, we count the number - of lookups we do to a given page to use a bitmap */ - unsigned long *code_bitmap; - unsigned int code_write_count; -#else +#ifdef CONFIG_USER_ONLY unsigned long flags; void *target_data; #endif -#ifndef CONFIG_USER_ONLY +#ifdef CONFIG_SOFTMMU QemuSpin lock; #endif } PageDesc; @@ -305,7 +299,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { if (i == 0) { - prev = (j == 0 ? tb->pc : 0); + prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); } else { prev = tcg_ctx->gen_insn_data[i - 1][j]; } @@ -333,7 +327,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc, bool reset_icount) { - target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; + target_ulong data[TARGET_INSN_START_WORDS]; uintptr_t host_pc = (uintptr_t)tb->tc.ptr; CPUArchState *env = cpu->env_ptr; const uint8_t *p = tb->tc.ptr + tb->tc.size; @@ -349,6 +343,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, return -1; } + memset(data, 0, sizeof(data)); + if (!TARGET_TB_PCREL) { + data[0] = tb_pc(tb); + } + /* Reconstruct the stored insn data while looking for the point at which the end of the insn exceeds the searched_pc. */ for (i = 0; i < num_insns; ++i) { @@ -472,7 +471,7 @@ void page_init(void) #endif } -static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) +static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc) { PageDesc *pd; void **lp; @@ -540,11 +539,11 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) static inline PageDesc *page_find(tb_page_addr_t index) { - return page_find_alloc(index, 0); + return page_find_alloc(index, false); } static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, - PageDesc **ret_p2, tb_page_addr_t phys2, int alloc); + PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc); /* In user-mode page locks aren't used; mmap_lock is enough */ #ifdef CONFIG_USER_ONLY @@ -658,7 +657,7 @@ static inline void page_unlock(PageDesc *pd) /* lock the page(s) of a TB in the correct acquisition order */ static inline void page_lock_tb(const TranslationBlock *tb) { - page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0); + page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false); } static inline void page_unlock_tb(const TranslationBlock *tb) @@ -847,7 +846,7 @@ void page_collection_unlock(struct page_collection *set) #endif /* !CONFIG_USER_ONLY */ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, - PageDesc **ret_p2, tb_page_addr_t phys2, int alloc) + PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc) { PageDesc *p1, *p2; tb_page_addr_t page1; @@ -891,13 +890,13 @@ static bool tb_cmp(const void *ap, const void *bp) const TranslationBlock *a = ap; const TranslationBlock *b = bp; - return a->pc == b->pc && - a->cs_base == b->cs_base && - a->flags == b->flags && - (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && - a->trace_vcpu_dstate == b->trace_vcpu_dstate && - a->page_addr[0] == b->page_addr[0] && - a->page_addr[1] == b->page_addr[1]; + return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) && + a->cs_base == b->cs_base && + a->flags == b->flags && + (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && + a->trace_vcpu_dstate == b->trace_vcpu_dstate && + a->page_addr[0] == b->page_addr[0] && + a->page_addr[1] == b->page_addr[1]); } void tb_htable_init(void) @@ -907,17 +906,6 @@ void tb_htable_init(void) qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); } -/* call with @p->lock held */ -static inline void invalidate_page_bitmap(PageDesc *p) -{ - assert_page_locked(p); -#ifdef CONFIG_SOFTMMU - g_free(p->code_bitmap); - p->code_bitmap = NULL; - p->code_write_count = 0; -#endif -} - /* Set to NULL all the 'first_tb' fields in all PageDescs. */ static void page_flush_tb_1(int level, void **lp) { @@ -932,7 +920,6 @@ static void page_flush_tb_1(int level, void **lp) for (i = 0; i < V_L2_SIZE; ++i) { page_lock(&pd[i]); pd[i].first_tb = (uintptr_t)NULL; - invalidate_page_bitmap(pd + i); page_unlock(&pd[i]); } } else { @@ -986,7 +973,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) } CPU_FOREACH(cpu) { - cpu_tb_jmp_cache_clear(cpu); + tcg_flush_jmp_cache(cpu); } qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE); @@ -1031,9 +1018,10 @@ static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp) TranslationBlock *tb = p; target_ulong addr = *(target_ulong *)userp; - if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) { + if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) || + addr >= tb_pc(tb) + tb->size)) { printf("ERROR invalidate: address=" TARGET_FMT_lx - " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size); + " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size); } } @@ -1052,11 +1040,11 @@ static void do_tb_page_check(void *p, uint32_t hash, void *userp) TranslationBlock *tb = p; int flags1, flags2; - flags1 = page_get_flags(tb->pc); - flags2 = page_get_flags(tb->pc + tb->size - 1); + flags1 = page_get_flags(tb_pc(tb)); + flags2 = page_get_flags(tb_pc(tb) + tb->size - 1); if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n", - (long)tb->pc, tb->size, flags1, flags2); + (long)tb_pc(tb), tb->size, flags1, flags2); } } @@ -1165,6 +1153,28 @@ static inline void tb_jmp_unlink(TranslationBlock *dest) qemu_spin_unlock(&dest->jmp_lock); } +static void tb_jmp_cache_inval_tb(TranslationBlock *tb) +{ + CPUState *cpu; + + if (TARGET_TB_PCREL) { + /* A TB may be at any virtual address */ + CPU_FOREACH(cpu) { + tcg_flush_jmp_cache(cpu); + } + } else { + uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb)); + + CPU_FOREACH(cpu) { + CPUJumpCache *jc = cpu->tb_jmp_cache; + + if (qatomic_read(&jc->array[h].tb) == tb) { + qatomic_set(&jc->array[h].tb, NULL); + } + } + } +} + /* * In user-mode, call with mmap_lock held. * In !user-mode, if @rm_from_page_list is set, call with the TB's pages' @@ -1172,7 +1182,6 @@ static inline void tb_jmp_unlink(TranslationBlock *dest) */ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) { - CPUState *cpu; PageDesc *p; uint32_t h; tb_page_addr_t phys_pc; @@ -1186,9 +1195,9 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) qemu_spin_unlock(&tb->jmp_lock); /* remove the TB from the hash list */ - phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags, - tb->trace_vcpu_dstate); + phys_pc = tb->page_addr[0]; + h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)), + tb->flags, orig_cflags, tb->trace_vcpu_dstate); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; } @@ -1197,21 +1206,14 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) if (rm_from_page_list) { p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); tb_page_remove(p, tb); - invalidate_page_bitmap(p); if (tb->page_addr[1] != -1) { p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); tb_page_remove(p, tb); - invalidate_page_bitmap(p); } } /* remove the TB from the hash list */ - h = tb_jmp_cache_hash_func(tb->pc); - CPU_FOREACH(cpu) { - if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) { - qatomic_set(&cpu->tb_jmp_cache[h], NULL); - } - } + tb_jmp_cache_inval_tb(tb); /* suppress this TB from the two jump lists */ tb_remove_from_jmp_list(tb, 0); @@ -1246,35 +1248,6 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) } } -#ifdef CONFIG_SOFTMMU -/* call with @p->lock held */ -static void build_page_bitmap(PageDesc *p) -{ - int n, tb_start, tb_end; - TranslationBlock *tb; - - assert_page_locked(p); - p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE); - - PAGE_FOR_EACH_TB(p, tb, n) { - /* NOTE: this is subtle as a TB may span two physical pages */ - if (n == 0) { - /* NOTE: tb_end may be after the end of the page, but - it is not a problem */ - tb_start = tb->pc & ~TARGET_PAGE_MASK; - tb_end = tb_start + tb->size; - if (tb_end > TARGET_PAGE_SIZE) { - tb_end = TARGET_PAGE_SIZE; - } - } else { - tb_start = 0; - tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); - } - bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start); - } -} -#endif - /* add the tb in the target page and protect it if necessary * * Called with mmap_lock held for user-mode emulation. @@ -1295,7 +1268,6 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, page_already_protected = p->first_tb != (uintptr_t)NULL; #endif p->first_tb = (uintptr_t)tb | n; - invalidate_page_bitmap(p); #if defined(CONFIG_USER_ONLY) /* translator_loop() must have made all TB pages non-writable */ @@ -1341,8 +1313,8 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, * Note that inserting into the hash table first isn't an option, since * we can only insert TBs that are fully initialized. */ - page_lock_pair(&p, phys_pc, &p2, phys_page2, 1); - tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK); + page_lock_pair(&p, phys_pc, &p2, phys_page2, true); + tb_page_add(p, tb, 0, phys_pc); if (p2) { tb_page_add(p2, tb, 1, phys_page2); } else { @@ -1350,17 +1322,15 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags, - tb->trace_vcpu_dstate); + h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)), + tb->flags, tb->cflags, tb->trace_vcpu_dstate); qht_insert(&tb_ctx.htable, tb, h, &existing_tb); /* remove TB from the page(s) if we couldn't insert it */ if (unlikely(existing_tb)) { tb_page_remove(p, tb); - invalidate_page_bitmap(p); if (p2) { tb_page_remove(p2, tb); - invalidate_page_bitmap(p2); } tb = existing_tb; } @@ -1423,7 +1393,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, gen_code_buf = tcg_ctx->code_gen_ptr; tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); +#if !TARGET_TB_PCREL tb->pc = pc; +#endif tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; @@ -1452,7 +1424,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_ctx->cpu = NULL; max_insns = tb->icount; - trace_translate_block(tb, tb->pc, tb->tc.ptr); + trace_translate_block(tb, pc, tb->tc.ptr); /* generate machine code */ tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; @@ -1473,7 +1445,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, ti = profile_getclock(); #endif - gen_code_size = tcg_gen_code(tcg_ctx, tb); + gen_code_size = tcg_gen_code(tcg_ctx, tb, pc); if (unlikely(gen_code_size < 0)) { error_return: switch (gen_code_size) { @@ -1529,7 +1501,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && - qemu_log_in_addr_range(tb->pc)) { + qemu_log_in_addr_range(pc)) { FILE *logfile = qemu_log_trylock(); if (logfile) { int code_size, data_size; @@ -1697,11 +1669,12 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (n == 0) { /* NOTE: tb_end may be after the end of the page, but it is not a problem */ - tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); + tb_start = tb->page_addr[0]; tb_end = tb_start + tb->size; } else { tb_start = tb->page_addr[1]; - tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); + tb_end = tb_start + ((tb->page_addr[0] + tb->size) + & ~TARGET_PAGE_MASK); } if (!(tb_end <= start || tb_start >= end)) { #ifdef TARGET_HAS_PRECISE_SMC @@ -1731,7 +1704,6 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, #if !defined(CONFIG_USER_ONLY) /* if no code remaining, no need to continue to use slow writes */ if (!p->first_tb) { - invalidate_page_bitmap(p); tlb_unprotect_code(start); } #endif @@ -1827,24 +1799,8 @@ void tb_invalidate_phys_page_fast(struct page_collection *pages, } assert_page_locked(p); - if (!p->code_bitmap && - ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { - build_page_bitmap(p); - } - if (p->code_bitmap) { - unsigned int nr; - unsigned long b; - - nr = start & ~TARGET_PAGE_MASK; - b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1)); - if (b & ((1 << len) - 1)) { - goto do_invalidate; - } - } else { - do_invalidate: - tb_invalidate_phys_page_range__locked(pages, p, start, start + len, - retaddr); - } + tb_invalidate_phys_page_range__locked(pages, p, start, start + len, + retaddr); } #else /* Called with mmap_lock held. If pc is not 0 then it indicates the @@ -1985,9 +1941,13 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) */ cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; - qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, - "cpu_io_recompile: rewound execution of TB to " - TARGET_FMT_lx "\n", tb->pc); + if (qemu_loglevel_mask(CPU_LOG_EXEC)) { + target_ulong pc = log_pc(cpu, tb); + if (qemu_log_in_addr_range(pc)) { + qemu_log("cpu_io_recompile: rewound execution of TB to " + TARGET_FMT_lx "\n", pc); + } + } cpu_loop_exit_noexc(cpu); } @@ -2289,7 +2249,7 @@ void page_set_flags(target_ulong start, target_ulong end, int flags) for (addr = start, len = end - start; len != 0; len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { - PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1); + PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true); /* If the write protection bit is set, then we invalidate the code inside. */ @@ -2512,6 +2472,26 @@ int page_unprotect(target_ulong address, uintptr_t pc) } #endif /* CONFIG_USER_ONLY */ +/* + * Called by generic code at e.g. cpu reset after cpu creation, + * therefore we must be prepared to allocate the jump cache. + */ +void tcg_flush_jmp_cache(CPUState *cpu) +{ + CPUJumpCache *jc = cpu->tb_jmp_cache; + + if (likely(jc)) { + for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { + qatomic_set(&jc->array[i].tb, NULL); + } + } else { + /* This should happen once during realize, and thus never race. */ + jc = g_new0(CPUJumpCache, 1); + jc = qatomic_xchg(&cpu->tb_jmp_cache, jc); + assert(jc == NULL); + } +} + /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ void tcg_flush_softmmu_tlb(CPUState *cs) { diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index ca8a5f2d83..8e78fd7a9c 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -75,7 +75,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns, ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ - plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY); + plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY); while (true) { db->num_insns++; |