diff options
32 files changed, 837 insertions, 510 deletions
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index d751bcba48..efde12fdb2 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -100,19 +100,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, cmpv, newv); +#else ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); +#endif ATOMIC_MMU_CLEANUP; return ret; } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return val; } @@ -124,9 +129,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_ST; - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) @@ -228,19 +234,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); +#else ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); +#endif ATOMIC_MMU_CLEANUP; return BSWAP(ret); } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return BSWAP(val); } @@ -253,9 +264,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ATOMIC_TRACE_ST; val = BSWAP(val); - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 6bcb6d99bd..870027d435 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -416,7 +416,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, } #endif /* See if we can patch the calling TB. */ - if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + if (last_tb) { tb_add_jump(last_tb, tb_exit, tb); } return tb; diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index f4702ce91f..af57aca5e4 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -32,6 +32,7 @@ #include "exec/log.h" #include "exec/helper-proto.h" #include "qemu/atomic.h" +#include "qemu/atomic128.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -58,9 +59,9 @@ } \ } while (0) -#define assert_cpu_is_self(this_cpu) do { \ +#define assert_cpu_is_self(cpu) do { \ if (DEBUG_TLB_GATE) { \ - g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \ + g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ } \ } while (0) @@ -73,6 +74,13 @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) +void tlb_init(CPUState *cpu) +{ + CPUArchState *env = cpu->env_ptr; + + qemu_spin_init(&env->tlb_lock); +} + /* flush_all_helper: run fn across all cpus * * If the wait flag is set then the src cpu's helper will be queued as @@ -125,8 +133,17 @@ static void tlb_flush_nocheck(CPUState *cpu) atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1); tlb_debug("(count: %zu)\n", tlb_flush_count()); + /* + * tlb_table/tlb_v_table updates from any thread must hold tlb_lock. + * However, updates from the owner thread (as is the case here; see the + * above assert_cpu_is_self) do not need atomic_set because all reads + * that do not hold the lock are performed by the same owner thread. + */ + qemu_spin_lock(&env->tlb_lock); memset(env->tlb_table, -1, sizeof(env->tlb_table)); memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); + qemu_spin_unlock(&env->tlb_lock); + cpu_tb_jmp_cache_clear(cpu); env->vtlb_index = 0; @@ -178,6 +195,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if (test_bit(mmu_idx, &mmu_idx_bitmask)) { @@ -187,6 +205,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); } } + qemu_spin_unlock(&env->tlb_lock); cpu_tb_jmp_cache_clear(cpu); @@ -239,23 +258,28 @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, target_ulong page) { return tlb_hit_page(tlb_entry->addr_read, page) || - tlb_hit_page(tlb_entry->addr_write, page) || + tlb_hit_page(tlb_addr_write(tlb_entry), page) || tlb_hit_page(tlb_entry->addr_code, page); } -static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page) +/* Called with tlb_lock held */ +static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, + target_ulong page) { if (tlb_hit_page_anyprot(tlb_entry, page)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); } } -static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx, - target_ulong page) +/* Called with tlb_lock held */ +static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, + target_ulong page) { int k; + + assert_cpu_is_self(ENV_GET_CPU(env)); for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page); + tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page); } } @@ -263,7 +287,6 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) { CPUArchState *env = cpu->env_ptr; target_ulong addr = (target_ulong) data.target_ptr; - int i; int mmu_idx; assert_cpu_is_self(cpu); @@ -281,11 +304,12 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) } addr &= TARGET_PAGE_MASK; - i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); - tlb_flush_vtlb_page(env, mmu_idx, addr); + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); } + qemu_spin_unlock(&env->tlb_lock); tb_flush_jmp_cache(cpu, addr); } @@ -314,20 +338,21 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; - int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); int mmu_idx; assert_cpu_is_self(cpu); - tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", - page, addr, mmu_idx_bitmap); + tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", + addr, mmu_idx_bitmap); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if (test_bit(mmu_idx, &mmu_idx_bitmap)) { - tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr); - tlb_flush_vtlb_page(env, mmu_idx, addr); + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); } } + qemu_spin_unlock(&env->tlb_lock); tb_flush_jmp_cache(cpu, addr); } @@ -450,72 +475,44 @@ void tlb_unprotect_code(ram_addr_t ram_addr) * most usual is detecting writes to code regions which may invalidate * generated code. * - * Because we want other vCPUs to respond to changes straight away we - * update the te->addr_write field atomically. If the TLB entry has - * been changed by the vCPU in the mean time we skip the update. + * Other vCPUs might be reading their TLBs during guest execution, so we update + * te->addr_write with atomic_set. We don't need to worry about this for + * oversized guests as MTTCG is disabled for them. * - * As this function uses atomic accesses we also need to ensure - * updates to tlb_entries follow the same access rules. We don't need - * to worry about this for oversized guests as MTTCG is disabled for - * them. + * Called with tlb_lock held. */ - -static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start, - uintptr_t length) +static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, + uintptr_t start, uintptr_t length) { -#if TCG_OVERSIZED_GUEST uintptr_t addr = tlb_entry->addr_write; if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { addr &= TARGET_PAGE_MASK; addr += tlb_entry->addend; if ((addr - start) < length) { +#if TCG_OVERSIZED_GUEST tlb_entry->addr_write |= TLB_NOTDIRTY; - } - } #else - /* paired with atomic_mb_set in tlb_set_page_with_attrs */ - uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write); - uintptr_t addr = orig_addr; - - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { - addr &= TARGET_PAGE_MASK; - addr += atomic_read(&tlb_entry->addend); - if ((addr - start) < length) { - uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY; - atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr); + atomic_set(&tlb_entry->addr_write, + tlb_entry->addr_write | TLB_NOTDIRTY); +#endif } } -#endif } -/* For atomic correctness when running MTTCG we need to use the right - * primitives when copying entries */ -static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s, - bool atomic_set) +/* + * Called with tlb_lock held. + * Called only from the vCPU context, i.e. the TLB's owner thread. + */ +static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) { -#if TCG_OVERSIZED_GUEST *d = *s; -#else - if (atomic_set) { - d->addr_read = s->addr_read; - d->addr_code = s->addr_code; - atomic_set(&d->addend, atomic_read(&s->addend)); - /* Pairs with flag setting in tlb_reset_dirty_range */ - atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write)); - } else { - d->addr_read = s->addr_read; - d->addr_write = atomic_read(&s->addr_write); - d->addr_code = s->addr_code; - d->addend = atomic_read(&s->addend); - } -#endif } /* This is a cross vCPU call (i.e. another vCPU resetting the flags of - * the target vCPU). As such care needs to be taken that we don't - * dangerously race with another vCPU update. The only thing actually - * updated is the target TLB entry ->addr_write flags. + * the target vCPU). + * We must take tlb_lock to avoid racing with another vCPU update. The only + * thing actually updated is the target TLB entry ->addr_write flags. */ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) { @@ -524,22 +521,26 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) int mmu_idx; env = cpu->env_ptr; + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { unsigned int i; for (i = 0; i < CPU_TLB_SIZE; i++) { - tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i], - start1, length); + tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, + length); } for (i = 0; i < CPU_VTLB_SIZE; i++) { - tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i], - start1, length); + tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1, + length); } } + qemu_spin_unlock(&env->tlb_lock); } -static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) +/* Called with tlb_lock held */ +static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, + target_ulong vaddr) { if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { tlb_entry->addr_write = vaddr; @@ -551,23 +552,23 @@ static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) { CPUArchState *env = cpu->env_ptr; - int i; int mmu_idx; assert_cpu_is_self(cpu); vaddr &= TARGET_PAGE_MASK; - i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); + tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); } for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr); + tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); } } + qemu_spin_unlock(&env->tlb_lock); } /* Our TLB does not support large pages, so remember the area covered by @@ -654,15 +655,24 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; } - /* Make sure there's no cached translation for the new page. */ - tlb_flush_vtlb_page(env, mmu_idx, vaddr_page); - code_address = address; iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, paddr_page, xlat, prot, &address); - index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - te = &env->tlb_table[mmu_idx][index]; + index = tlb_index(env, mmu_idx, vaddr_page); + te = tlb_entry(env, mmu_idx, vaddr_page); + + /* + * Hold the TLB lock for the rest of the function. We could acquire/release + * the lock several times in the function, but it is faster to amortize the + * acquisition cost by acquiring it just once. Note that this leads to + * a longer critical section, but this is not a concern since the TLB lock + * is unlikely to be contended. + */ + qemu_spin_lock(&env->tlb_lock); + + /* Make sure there's no cached translation for the new page. */ + tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); /* * Only evict the old entry to the victim tlb if it's for a @@ -673,7 +683,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; /* Evict the old entry into the victim tlb. */ - copy_tlb_helper(tv, te, true); + copy_tlb_helper_locked(tv, te); env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; } @@ -725,9 +735,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } } - /* Pairs with flag setting in tlb_reset_dirty_range */ - copy_tlb_helper(te, &tn, true); - /* atomic_mb_set(&te->addr_write, write_address); */ + copy_tlb_helper_locked(te, &tn); + qemu_spin_unlock(&env->tlb_lock); } /* Add a new TLB entry, but without specifying the memory @@ -773,16 +782,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, * repeat the MMU check here. This tlb_fill() call might * longjump out if this access should cause a guest exception. */ - int index; + CPUTLBEntry *entry; target_ulong tlb_addr; tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr = env->tlb_table[mmu_idx][index].addr_read; + entry = tlb_entry(env, mmu_idx, addr); + tlb_addr = entry->addr_read; if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; + uintptr_t haddr = addr + entry->addend; return ldn_p((void *)haddr, size); } @@ -840,16 +849,16 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, * repeat the MMU check here. This tlb_fill() call might * longjump out if this access should cause a guest exception. */ - int index; + CPUTLBEntry *entry; target_ulong tlb_addr; tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + entry = tlb_entry(env, mmu_idx, addr); + tlb_addr = tlb_addr_write(entry); if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; + uintptr_t haddr = addr + entry->addend; stn_p((void *)haddr, size, val); return; @@ -891,17 +900,28 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, size_t elt_ofs, target_ulong page) { size_t vidx; + + assert_cpu_is_self(ENV_GET_CPU(env)); for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; - target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); + target_ulong cmp; + + /* elt_ofs might correspond to .addr_write, so use atomic_read */ +#if TCG_OVERSIZED_GUEST + cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); +#else + cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); +#endif if (cmp == page) { /* Found entry in victim tlb, swap tlb and iotlb. */ CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; - copy_tlb_helper(&tmptlb, tlb, false); - copy_tlb_helper(tlb, vtlb, true); - copy_tlb_helper(vtlb, &tmptlb, true); + qemu_spin_lock(&env->tlb_lock); + copy_tlb_helper_locked(&tmptlb, tlb); + copy_tlb_helper_locked(tlb, vtlb); + copy_tlb_helper_locked(vtlb, &tmptlb); + qemu_spin_unlock(&env->tlb_lock); CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; @@ -924,20 +944,19 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, */ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) { - int mmu_idx, index; + uintptr_t mmu_idx = cpu_mmu_index(env, true); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); void *p; - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - mmu_idx = cpu_mmu_index(env, true); - if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) { + if (unlikely(!tlb_hit(entry->addr_code, addr))) { if (!VICTIM_TLB_HIT(addr_code, addr)) { tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); } - assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr)); + assert(tlb_hit(entry->addr_code, addr)); } - if (unlikely(env->tlb_table[mmu_idx][index].addr_code & - (TLB_RECHECK | TLB_MMIO))) { + if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) { /* * Return -1 if we can't translate and execute from an entire * page of RAM here, which will cause us to execute by loading @@ -949,7 +968,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) return -1; } - p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend); + p = (void *)((uintptr_t)addr + entry->addend); return qemu_ram_addr_from_host_nofail(p); } @@ -962,10 +981,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, uintptr_t retaddr) { - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - if (!tlb_hit(tlb_addr, addr)) { + if (!tlb_hit(tlb_addr_write(entry), addr)) { /* TLB entry is for a different page */ if (!VICTIM_TLB_HIT(addr_write, addr)) { tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, @@ -981,9 +1000,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, NotDirtyInfo *ndi) { size_t mmu_idx = get_mmuidx(oi); - size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index]; - target_ulong tlb_addr = tlbe->addr_write; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(tlbe); TCGMemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); int s_bits = mop & MO_SIZE; @@ -1014,7 +1033,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; } /* Notice an IO access or a needs-MMU-lookup access */ @@ -1101,7 +1120,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #include "atomic_template.h" #endif -#ifdef CONFIG_ATOMIC128 +#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 #define DATA_SIZE 16 #include "atomic_template.h" #endif diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h index f060a693d4..b0adea045e 100644 --- a/accel/tcg/softmmu_template.h +++ b/accel/tcg/softmmu_template.h @@ -111,9 +111,10 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = entry->ADDR_READ; unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; DATA_TYPE res; @@ -129,7 +130,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlb_addr = entry->ADDR_READ; } /* Handle an IO access. */ @@ -166,7 +167,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, return res; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; #if DATA_SIZE == 1 res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); #else @@ -179,9 +180,10 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = entry->ADDR_READ; unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; DATA_TYPE res; @@ -197,7 +199,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlb_addr = entry->ADDR_READ; } /* Handle an IO access. */ @@ -234,7 +236,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, return res; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); return res; } @@ -275,9 +277,10 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -292,7 +295,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -313,16 +316,16 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, if (DATA_SIZE > 1 && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 >= TARGET_PAGE_SIZE)) { - int i, index2; - target_ulong page2, tlb_addr2; + int i; + target_ulong page2; + CPUTLBEntry *entry2; do_unaligned_access: /* Ensure the second page is in the TLB. Note that the first page is already guaranteed to be filled, and that the second page cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; - if (!tlb_hit_page(tlb_addr2, page2) + entry2 = tlb_entry(env, mmu_idx, page2); + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); @@ -340,7 +343,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, return; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; #if DATA_SIZE == 1 glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); #else @@ -352,9 +355,10 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -369,7 +373,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -390,16 +394,16 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, if (DATA_SIZE > 1 && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 >= TARGET_PAGE_SIZE)) { - int i, index2; - target_ulong page2, tlb_addr2; + int i; + target_ulong page2; + CPUTLBEntry *entry2; do_unaligned_access: /* Ensure the second page is in the TLB. Note that the first page is already guaranteed to be filled, and that the second page cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; - if (!tlb_hit_page(tlb_addr2, page2) + entry2 = tlb_entry(env, mmu_idx, page2); + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); @@ -417,7 +421,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, return; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); } #endif /* DATA_SIZE > 1 */ diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 56dbb56a16..3d25bdcc17 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -51,7 +51,7 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) if (!qemu_cpu_is_self(cpu)) { qemu_cpu_kick(cpu); } else { - cpu->icount_decr.u16.high = -1; + atomic_set(&cpu->icount_decr.u16.high, -1); if (use_icount && !cpu->can_do_io && (mask & ~old_mask) != 0) { diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index ad5c758246..356dcd0948 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2341,7 +2341,7 @@ void cpu_interrupt(CPUState *cpu, int mask) { g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; - cpu->icount_decr.u16.high = -1; + atomic_set(&cpu->icount_decr.u16.high, -1); } /* diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 26a3ffbba1..cd75829cf2 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "translate-all.h" #include "exec/helper-proto.h" +#include "qemu/atomic128.h" #undef EAX #undef ECX @@ -615,7 +616,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* The following is only callable from other helpers, and matches up with the softmmu version. */ -#ifdef CONFIG_ATOMIC128 +#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 #undef EXTRA_ARGS #undef ATOMIC_NAME @@ -628,4 +629,4 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #define DATA_SIZE 16 #include "atomic_template.h" -#endif /* CONFIG_ATOMIC128 */ +#endif @@ -5154,6 +5154,21 @@ EOF fi fi +cmpxchg128=no +if test "$int128" = yes -a "$atomic128" = no; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + __sync_val_compare_and_swap_16(&x, y, x); + return 0; +} +EOF + if compile_prog "" "" ; then + cmpxchg128=yes + fi +fi + ######################################### # See if 64-bit atomic operations are supported. # Note that without __atomic builtins, we can only @@ -6663,6 +6678,10 @@ if test "$atomic128" = "yes" ; then echo "CONFIG_ATOMIC128=y" >> $config_host_mak fi +if test "$cmpxchg128" = "yes" ; then + echo "CONFIG_CMPXCHG128=y" >> $config_host_mak +fi + if test "$atomic64" = "yes" ; then echo "CONFIG_ATOMIC64=y" >> $config_host_mak fi @@ -1425,7 +1425,8 @@ static int tcg_cpu_exec(CPUState *cpu) ret = cpu_exec(cpu); cpu_exec_end(cpu); #ifdef CONFIG_PROFILER - tcg_time += profile_getclock() - ti; + atomic_set(&tcg_ctx->prof.cpu_exec_time, + tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); #endif return ret; } @@ -965,6 +965,7 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) tcg_target_initialized = true; cc->tcg_initialize(); } + tlb_init(cpu); #ifndef CONFIG_USER_ONLY if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index a171ffc1a4..4ff62f32bf 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -24,6 +24,7 @@ #endif #include "qemu/host-utils.h" +#include "qemu/thread.h" #include "qemu/queue.h" #ifdef CONFIG_TCG #include "tcg-target.h" @@ -142,6 +143,8 @@ typedef struct CPUIOTLBEntry { #define CPU_COMMON_TLB \ /* The meaning of the MMU modes is defined in the target code. */ \ + /* tlb_lock serializes updates to tlb_table and tlb_v_table */ \ + QemuSpin tlb_lock; \ CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \ CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \ diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 41ed0526e2..959068495a 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -126,6 +126,29 @@ extern __thread uintptr_t helper_retaddr; /* The memory helpers for tcg-generated code need tcg_target_long etc. */ #include "tcg.h" +static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) +{ +#if TCG_OVERSIZED_GUEST + return entry->addr_write; +#else + return atomic_read(&entry->addr_write); +#endif +} + +/* Find the TLB index corresponding to the mmu_idx + address pair. */ +static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); +} + +/* Find the TLB entry corresponding to the mmu_idx + address pair. */ +static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; +} + #ifdef MMU_MODE0_SUFFIX #define CPU_MMU_INDEX 0 #define MEMSUFFIX MMU_MODE0_SUFFIX @@ -416,8 +439,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, #if defined(CONFIG_USER_ONLY) return g2h(addr); #else - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; + CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr); abi_ptr tlb_addr; uintptr_t haddr; @@ -426,7 +448,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, tlb_addr = tlbentry->addr_read; break; case 1: - tlb_addr = tlbentry->addr_write; + tlb_addr = tlb_addr_write(tlbentry); break; case 2: tlb_addr = tlbentry->addr_code; @@ -445,7 +467,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, return NULL; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + tlbentry->addend; return (void *)haddr; #endif /* defined(CONFIG_USER_ONLY) */ } diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h index 4db2302962..0f061d47ef 100644 --- a/include/exec/cpu_ldst_template.h +++ b/include/exec/cpu_ldst_template.h @@ -81,7 +81,7 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) { - int page_index; + CPUTLBEntry *entry; RES_TYPE res; target_ulong addr; int mmu_idx; @@ -94,15 +94,15 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #endif addr = ptr; - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != + entry = tlb_entry(env, mmu_idx, addr); + if (unlikely(entry->ADDR_READ != (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { oi = make_memop_idx(SHIFT, mmu_idx); res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + uintptr_t hostaddr = addr + entry->addend; res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr); } return res; @@ -120,7 +120,8 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) { - int res, page_index; + CPUTLBEntry *entry; + int res; target_ulong addr; int mmu_idx; TCGMemOpIdx oi; @@ -132,15 +133,15 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #endif addr = ptr; - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != + entry = tlb_entry(env, mmu_idx, addr); + if (unlikely(entry->ADDR_READ != (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { oi = make_memop_idx(SHIFT, mmu_idx); res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + uintptr_t hostaddr = addr + entry->addend; res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr); } return res; @@ -162,7 +163,7 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, target_ulong ptr, RES_TYPE v, uintptr_t retaddr) { - int page_index; + CPUTLBEntry *entry; target_ulong addr; int mmu_idx; TCGMemOpIdx oi; @@ -174,15 +175,15 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #endif addr = ptr; - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; - if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write != + entry = tlb_entry(env, mmu_idx, addr); + if (unlikely(tlb_addr_write(entry) != (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { oi = make_memop_idx(SHIFT, mmu_idx); glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, retaddr); } else { - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + uintptr_t hostaddr = addr + entry->addend; glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v); } } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 5f78125582..815e5b1e83 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -100,6 +100,11 @@ void cpu_address_space_init(CPUState *cpu, int asidx, #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) /* cputlb.c */ /** + * tlb_init - initialize a CPU's TLB + * @cpu: CPU whose TLB should be initialized + */ +void tlb_init(CPUState *cpu); +/** * tlb_flush_page: * @cpu: CPU whose TLB should be flushed * @addr: virtual address of page to be flushed @@ -258,6 +263,9 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, uintptr_t retaddr); #else +static inline void tlb_init(CPUState *cpu) +{ +} static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) { } diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h new file mode 100644 index 0000000000..a6af22ff10 --- /dev/null +++ b/include/qemu/atomic128.h @@ -0,0 +1,153 @@ +/* + * Simple interface for 128-bit atomic operations. + * + * Copyright (C) 2018 Linaro, Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * See docs/devel/atomics.txt for discussion about the guarantees each + * atomic primitive is meant to provide. + */ + +#ifndef QEMU_ATOMIC128_H +#define QEMU_ATOMIC128_H + +/* + * GCC is a house divided about supporting large atomic operations. + * + * For hosts that only have large compare-and-swap, a legalistic reading + * of the C++ standard means that one cannot implement __atomic_read on + * read-only memory, and thus all atomic operations must synchronize + * through libatomic. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878 + * + * This interpretation is not especially helpful for QEMU. + * For softmmu, all RAM is always read/write from the hypervisor. + * For user-only, if the guest doesn't implement such an __atomic_read + * then the host need not worry about it either. + * + * Moreover, using libatomic is not an option, because its interface is + * built for std::atomic<T>, and requires that *all* accesses to such an + * object go through the library. In our case we do not have an object + * in the C/C++ sense, but a view of memory as seen by the guest. + * The guest may issue a large atomic operation and then access those + * pieces using word-sized accesses. From the hypervisor, we have no + * way to connect those two actions. + * + * Therefore, special case each platform. + */ + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return atomic_cmpxchg__nocheck(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(CONFIG_CMPXCHG128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return __sync_val_compare_and_swap_16(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(__aarch64__) +/* Through gcc 8, aarch64 has no support for 128-bit at all. */ +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp); + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); + uint64_t oldl, oldh; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "cmp %[oldl], %[cmpl]\n\t" + "ccmp %[oldh], %[cmph], #0, eq\n\t" + "b.ne 1f\n\t" + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" + "cbnz %w[tmp], 0b\n" + "1:" + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), + [oldl] "=&r"(oldl), [oldh] "=r"(oldh) + : [cmpl] "r"(cmpl), [cmph] "r"(cmph), + [newl] "r"(newl), [newh] "r"(newh) + : "memory", "cc"); + + return int128_make128(oldl, oldh); +} +# define HAVE_CMPXCHG128 1 +#else +/* Fallback definition that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") + atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new); +# define HAVE_CMPXCHG128 0 +#endif /* Some definition for HAVE_CMPXCHG128 */ + + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_read(Int128 *ptr) +{ + return atomic_read__nocheck(ptr); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + atomic_set__nocheck(ptr, val); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__) +/* We can do better than cmpxchg for AArch64. */ +static inline Int128 atomic16_read(Int128 *ptr) +{ + uint64_t l, h; + uint32_t tmp; + + /* The load must be paired with the store to guarantee not tearing. */ + asm("0: ldxp %[l], %[h], %[mem]\n\t" + "stxp %w[tmp], %[l], %[h], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h)); + + return int128_make128(l, h); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + uint64_t l = int128_getlo(val), h = int128_gethi(val); + uint64_t t1, t2; + + /* Load into temporaries to acquire the exclusive access lock. */ + asm("0: ldxp %[t1], %[t2], %[mem]\n\t" + "stxp %w[t1], %[l], %[h], %[mem]\n\t" + "cbnz %w[t1], 0b" + : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2) + : [l] "r"(l), [h] "r"(h)); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128 +static inline Int128 atomic16_read(Int128 *ptr) +{ + /* Maybe replace 0 with 0, returning the old value. */ + return atomic16_cmpxchg(ptr, 0, 0); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + Int128 old = *ptr, cmp; + do { + cmp = old; + old = atomic16_cmpxchg(ptr, cmp, val); + } while (old != cmp); +} + +# define HAVE_ATOMIC128 1 +#else +/* Fallback definitions that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr); +void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val); +# define HAVE_ATOMIC128 0 +#endif /* Some definition for HAVE_ATOMIC128 */ + +#endif /* QEMU_ATOMIC128_H */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 66f8c241dc..6b92710487 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -146,6 +146,17 @@ # define QEMU_FLATTEN #endif +/* + * If __attribute__((error)) is present, use it to produce an error at + * compile time. Otherwise, one must wait for the linker to diagnose + * the missing symbol. + */ +#if __has_attribute(error) +# define QEMU_ERROR(X) __attribute__((error(X))) +#else +# define QEMU_ERROR(X) +#endif + /* Implement C11 _Generic via GCC builtins. Example: * * QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index a005ed2692..dfecd03e28 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -1046,7 +1046,6 @@ static inline int64_t profile_getclock(void) return get_clock(); } -extern int64_t tcg_time; extern int64_t dev_time; #endif @@ -83,6 +83,7 @@ #include "sysemu/cpus.h" #include "sysemu/iothread.h" #include "qemu/cutils.h" +#include "tcg/tcg.h" #if defined(TARGET_S390X) #include "hw/s390x/storage-keys.h" @@ -1966,16 +1967,22 @@ static void hmp_info_numa(Monitor *mon, const QDict *qdict) #ifdef CONFIG_PROFILER -int64_t tcg_time; int64_t dev_time; static void hmp_info_profile(Monitor *mon, const QDict *qdict) { + static int64_t last_cpu_exec_time; + int64_t cpu_exec_time; + int64_t delta; + + cpu_exec_time = tcg_cpu_exec_time(); + delta = cpu_exec_time - last_cpu_exec_time; + monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n", dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n", - tcg_time, tcg_time / (double)NANOSECONDS_PER_SECOND); - tcg_time = 0; + delta, delta / (double)NANOSECONDS_PER_SECOND); + last_cpu_exec_time = cpu_exec_time; dev_time = 0; } #else @@ -265,7 +265,7 @@ static void cpu_common_reset(CPUState *cpu) cpu->mem_io_pc = 0; cpu->mem_io_vaddr = 0; cpu->icount_extra = 0; - cpu->icount_decr.u32 = 0; + atomic_set(&cpu->icount_decr.u32, 0); cpu->can_do_io = 1; cpu->exception_index = -1; cpu->crash_occurred = false; diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index b08078e7fc..a953897fcc 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -201,7 +201,6 @@ static void alpha_cpu_initfn(Object *obj) CPUAlphaState *env = &cpu->env; cs->env_ptr = env; - tlb_flush(cs); env->lock_addr = -1; #if defined(CONFIG_USER_ONLY) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 7f6ad3000b..61799d20e1 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -30,6 +30,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" #include "fpu/softfloat.h" #include <zlib.h> /* For crc32 */ @@ -509,189 +510,187 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Returns 0 on success; 1 otherwise. */ -static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) { - Int128 oldv, cmpv, newv; + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; bool success; - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - newv = int128_make128(new_lo, new_hi); - - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; - #ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o0 = ldq_le_p(haddr + 0); - o1 = ldq_le_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_le_p(haddr + 0, int128_getlo(newv)); - stq_le_p(haddr + 1, int128_gethi(newv)); - } - helper_retaddr = 0; + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } + helper_retaddr = 0; #else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); - - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } -#endif + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); } +#endif return !success; } -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC()); -} - uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC()); -} - -static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) -{ Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); bool success; + int mem_idx; + TCGMemOpIdx oi; - /* high and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - newv = int128_make128(new_hi, new_lo); + assert(HAVE_CMPXCHG128); - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o1 = ldq_be_p(haddr + 0); - o0 = ldq_be_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_be_p(haddr + 0, int128_gethi(newv)); - stq_be_p(haddr + 1, int128_getlo(newv)); - } - helper_retaddr = 0; -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); - - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } -#endif - } + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); return !success; } uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC()); + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; + bool success; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } + helper_retaddr = 0; +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + + return !success; } uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) + uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); + Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + bool success; + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + cmpv = int128_make128(env->exclusive_high, env->exclusive_val); + newv = int128_make128(new_hi, new_lo); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + + success = int128_eq(oldv, cmpv); + return !success; } /* Writes back the old data into Rs. */ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs] = int128_getlo(oldv); env->xregs[rs + 1] = int128_gethi(oldv); -#endif } void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_hi, uint64_t new_lo) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs + 1] = int128_getlo(oldv); env->xregs[rs] = int128_gethi(oldv); -#endif } /* diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 8a24278d79..bb9c4d8ac7 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -37,6 +37,7 @@ #include "trace-tcg.h" #include "translate-a64.h" +#include "qemu/atomic128.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; @@ -2086,26 +2087,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, get_mem_index(s), MO_64 | MO_ALIGN | s->be_data); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); - } else if (s->be_data == MO_LE) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!HAVE_CMPXCHG128) { + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; + } else if (s->be_data == MO_LE) { gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); - } - } else { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); - } else { - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); } + } else if (s->be_data == MO_LE) { + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, @@ -2175,14 +2177,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, } tcg_temp_free_i64(cmp); } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - TCGv_i32 tcg_rs = tcg_const_i32(rs); - - if (s->be_data == MO_LE) { - gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + if (HAVE_CMPXCHG128) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); } else { - gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(tcg_rs); } else { TCGv_i64 d1 = tcg_temp_new_i64(); TCGv_i64 d2 = tcg_temp_new_i64(); diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c index 30c26b9d9c..6cc53bcb40 100644 --- a/target/i386/mem_helper.c +++ b/target/i386/mem_helper.c @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) @@ -137,10 +138,7 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) if ((a0 & 0xf) != 0) { raise_exception_ra(env, EXCP0D_GPF, ra); - } else { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else + } else if (HAVE_CMPXCHG128) { int eflags = cpu_cc_compute_all(env, CC_OP); Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); @@ -159,7 +157,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) eflags &= ~CC_Z; } CC_SRC = eflags; -#endif + } else { + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } #endif diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ef64248bc4..7a1481fd0b 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -800,7 +800,7 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) DEF_HELPER_1(tbegin, void, env) DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG, diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 8f0d86d104..a1485fad9b 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "tcg.h" #include "internal.h" +#include "qemu/atomic128.h" //#define DEBUG_OP @@ -215,11 +216,15 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, return i; } -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -227,7 +232,11 @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -235,14 +244,22 @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC()); } void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC()); } @@ -252,6 +269,9 @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; @@ -271,6 +291,9 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 881743571b..4e59dd5f42 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -33,6 +33,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" #define CPU_SINGLE_STEP 0x1 @@ -2654,22 +2655,22 @@ static void gen_lq(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -2805,21 +2806,21 @@ static void gen_std(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -3404,26 +3405,26 @@ static void gen_lqarx(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; + tcg_temp_free(EA); + return; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; - tcg_temp_free(EA); - return; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16); tcg_gen_mov_tl(cpu_reserve, EA); @@ -3461,20 +3462,22 @@ static void gen_stqcx_(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); -#ifdef CONFIG_ATOMIC128 - if (ctx->le_mode) { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + if (HAVE_CMPXCHG128) { + TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); + if (ctx->le_mode) { + gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } else { + gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif tcg_temp_free(EA); - tcg_temp_free_i32(oi); } else { TCGLabel *lab_fail = gen_new_label(); TCGLabel *lab_over = gen_new_label(); diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index bacae4f503..490c43e6e6 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #if !defined(CONFIG_USER_ONLY) #include "hw/s390x/storage-keys.h" @@ -1379,65 +1380,62 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, return cc; } -static void do_cdsg(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3, bool parallel) +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, + uint32_t r1, uint32_t r3) { uintptr_t ra = GETPC(); Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 oldv; + uint64_t oldh, oldl; bool fail; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - fail = !int128_eq(oldv, cmpv); -#endif - } else { - uint64_t oldh, oldl; + check_alignment(env, addr, 16, ra); - check_alignment(env, addr, 16, ra); + oldh = cpu_ldq_data_ra(env, addr + 0, ra); + oldl = cpu_ldq_data_ra(env, addr + 8, ra); - oldh = cpu_ldq_data_ra(env, addr + 0, ra); - oldl = cpu_ldq_data_ra(env, addr + 8, ra); - - oldv = int128_make128(oldl, oldh); - fail = !int128_eq(oldv, cmpv); - if (fail) { - newv = oldv; - } - - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + oldv = int128_make128(oldl, oldh); + fail = !int128_eq(oldv, cmpv); + if (fail) { + newv = oldv; } + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + env->cc_op = fail; env->regs[r1] = int128_gethi(oldv); env->regs[r1 + 1] = int128_getlo(oldv); } -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - do_cdsg(env, addr, r1, r3, false); -} - void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, uint32_t r1, uint32_t r3) { - do_cdsg(env, addr, r1, r3, true); + uintptr_t ra = GETPC(); + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + int mem_idx; + TCGMemOpIdx oi; + Int128 oldv; + bool fail; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + fail = !int128_eq(oldv, cmpv); + + env->cc_op = fail; + env->regs[r1] = int128_gethi(oldv); + env->regs[r1 + 1] = int128_getlo(oldv); } static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2, bool parallel) { -#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128) uint32_t mem_idx = cpu_mmu_index(env, false); -#endif uintptr_t ra = GETPC(); uint32_t fc = extract32(env->regs[0], 0, 8); uint32_t sc = extract32(env->regs[0], 8, 8); @@ -1465,18 +1463,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, probe_write(env, a2, 0, mem_idx, ra); #endif - /* Note that the compare-and-swap is atomic, and the store is atomic, but - the complete operation is not. Therefore we do not need to assert serial - context in order to implement this. That said, restart early if we can't - support either operation that is supposed to be atomic. */ + /* + * Note that the compare-and-swap is atomic, and the store is atomic, + * but the complete operation is not. Therefore we do not need to + * assert serial context in order to implement this. That said, + * restart early if we can't support either operation that is supposed + * to be atomic. + */ if (parallel) { - int mask = 0; -#if !defined(CONFIG_ATOMIC64) - mask = -8; -#elif !defined(CONFIG_ATOMIC128) - mask = -16; + uint32_t max = 2; +#ifdef CONFIG_ATOMIC64 + max = 3; #endif - if (((4 << fc) | (1 << sc)) & mask) { + if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || + (HAVE_ATOMIC128 ? 0 : sc > max)) { cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } @@ -1546,16 +1546,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 ov; - if (parallel) { -#ifdef CONFIG_ATOMIC128 - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); - cc = !int128_eq(ov, cv); -#else - /* Note that we asserted !parallel above. */ - g_assert_not_reached(); -#endif - } else { + if (!parallel) { uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); @@ -1567,6 +1558,13 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); + } else if (HAVE_CMPXCHG128) { + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); + cc = !int128_eq(ov, cv); + } else { + /* Note that we asserted !parallel above. */ + g_assert_not_reached(); } env->regs[r3 + 0] = int128_gethi(ov); @@ -1596,18 +1594,16 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a2, svh, ra); break; case 4: - if (parallel) { -#ifdef CONFIG_ATOMIC128 + if (!parallel) { + cpu_stq_data_ra(env, a2 + 0, svh, ra); + cpu_stq_data_ra(env, a2 + 8, svl, ra); + } else if (HAVE_ATOMIC128) { TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); Int128 sv = int128_make128(svl, svh); helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); -#else + } else { /* Note that we asserted !parallel above. */ g_assert_not_reached(); -#endif - } else { - cpu_stq_data_ra(env, a2 + 0, svh, ra); - cpu_stq_data_ra(env, a2 + 8, svl, ra); } break; default: @@ -2100,76 +2096,64 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr) #endif /* load pair from quadword */ -static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) { uintptr_t ra = GETPC(); uint64_t hi, lo; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); - hi = int128_gethi(v); - lo = int128_getlo(v); -#endif - } else { - check_alignment(env, addr, 16, ra); - - hi = cpu_ldq_data_ra(env, addr + 0, ra); - lo = cpu_ldq_data_ra(env, addr + 8, ra); - } + check_alignment(env, addr, 16, ra); + hi = cpu_ldq_data_ra(env, addr + 0, ra); + lo = cpu_ldq_data_ra(env, addr + 8, ra); env->retxl = lo; return hi; } -uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) -{ - return do_lpq(env, addr, false); -} - uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) { - return do_lpq(env, addr, true); -} - -/* store pair to quadword */ -static void do_stpq(CPUS390XState *env, uint64_t addr, - uint64_t low, uint64_t high, bool parallel) -{ uintptr_t ra = GETPC(); + uint64_t hi, lo; + int mem_idx; + TCGMemOpIdx oi; + Int128 v; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + assert(HAVE_ATOMIC128); - Int128 v = int128_make128(low, high); - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); -#endif - } else { - check_alignment(env, addr, 16, ra); + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); + hi = int128_gethi(v); + lo = int128_getlo(v); - cpu_stq_data_ra(env, addr + 0, high, ra); - cpu_stq_data_ra(env, addr + 8, low, ra); - } + env->retxl = lo; + return hi; } +/* store pair to quadword */ void HELPER(stpq)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, false); + uintptr_t ra = GETPC(); + + check_alignment(env, addr, 16, ra); + cpu_stq_data_ra(env, addr + 0, high, ra); + cpu_stq_data_ra(env, addr + 8, low, ra); } void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, true); + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + Int128 v; + + assert(HAVE_ATOMIC128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = int128_make128(low, high); + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); } /* Execute instruction. This instruction executes an insn modified with diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 18861cd186..b5bd56b7ee 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -44,6 +44,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" /* Information that (most) every instruction needs to manipulate. */ @@ -1128,11 +1129,19 @@ struct DisasInsn { const char *name; + /* Pre-process arguments before HELP_OP. */ void (*help_in1)(DisasContext *, DisasFields *, DisasOps *); void (*help_in2)(DisasContext *, DisasFields *, DisasOps *); void (*help_prep)(DisasContext *, DisasFields *, DisasOps *); + + /* + * Post-process output after HELP_OP. + * Note that these are not called if HELP_OP returns DISAS_NORETURN. + */ void (*help_wout)(DisasContext *, DisasFields *, DisasOps *); void (*help_cout)(DisasContext *, DisasOps *); + + /* Implement the operation itself. */ DisasJumpType (*help_op)(DisasContext *, DisasOps *); uint64_t data; @@ -2032,6 +2041,7 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) int r3 = get_field(s->fields, r3); int d2 = get_field(s->fields, d2); int b2 = get_field(s->fields, b2); + DisasJumpType ret = DISAS_NEXT; TCGv_i64 addr; TCGv_i32 t_r1, t_r3; @@ -2039,17 +2049,20 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) addr = get_address(s, 0, b2, d2); t_r1 = tcg_const_i32(r1); t_r3 = tcg_const_i32(r3); - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + } else if (HAVE_CMPXCHG128) { gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); } else { - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + gen_helper_exit_atomic(cpu_env); + ret = DISAS_NORETURN; } tcg_temp_free_i64(addr); tcg_temp_free_i32(t_r1); tcg_temp_free_i32(t_r3); set_cc_static(s); - return DISAS_NEXT; + return ret; } static DisasJumpType op_csst(DisasContext *s, DisasOps *o) @@ -3026,10 +3039,13 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o) static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_lpq(o->out, cpu_env, o->in2); + } else if (HAVE_ATOMIC128) { gen_helper_lpq_parallel(o->out, cpu_env, o->in2); } else { - gen_helper_lpq(o->out, cpu_env, o->in2); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return_low128(o->out2); return DISAS_NEXT; @@ -4406,10 +4422,13 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o) static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + } else if (HAVE_ATOMIC128) { gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); } else { - gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return DISAS_NEXT; } @@ -6125,11 +6144,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) if (insn->help_op) { ret = insn->help_op(s, &o); } - if (insn->help_wout) { - insn->help_wout(s, &f, &o); - } - if (insn->help_cout) { - insn->help_cout(s, &o); + if (ret != DISAS_NORETURN) { + if (insn->help_wout) { + insn->help_wout(s, &f, &o); + } + if (insn->help_cout) { + insn->help_cout(s, &o); + } } /* Free any temporaries created by the helpers. */ diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c index 68f978d80b..2b49d1ca40 100644 --- a/target/unicore32/cpu.c +++ b/target/unicore32/cpu.c @@ -116,8 +116,6 @@ static void uc32_cpu_initfn(Object *obj) env->uncached_asr = ASR_MODE_PRIV; env->regs[31] = 0x03000000; #endif - - tlb_flush(cs); } static const VMStateDescription vmstate_uc32_cpu = { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index daa416a143..7a8015c5a9 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2586,6 +2586,10 @@ void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx) seen this numbered exit before, via tcg_gen_goto_tb. */ tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx)); #endif + /* When not chaining, exit without indicating a link. */ + if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + val = 0; + } } else { /* This is an exit via the exitreq label. */ tcg_debug_assert(idx == TB_EXIT_REQUESTED); @@ -2603,7 +2607,10 @@ void tcg_gen_goto_tb(unsigned idx) tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0); tcg_ctx->goto_tb_issue_mask |= 1 << idx; #endif - tcg_gen_op1i(INDEX_op_goto_tb, idx); + /* When not chaining, we simply fall through to the "fallback" exit. */ + if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + tcg_gen_op1i(INDEX_op_goto_tb, idx); + } } void tcg_gen_lookup_and_goto_ptr(void) @@ -30,6 +30,7 @@ /* Define to jump the ELF file used to communicate with GDB. */ #undef DEBUG_JIT +#include "qemu/error-report.h" #include "qemu/cutils.h" #include "qemu/host-utils.h" #include "qemu/timer.h" @@ -3361,6 +3362,7 @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) const TCGProfile *orig = &s->prof; if (counters) { + PROF_ADD(prof, orig, cpu_exec_time); PROF_ADD(prof, orig, tb_count1); PROF_ADD(prof, orig, tb_count); PROF_ADD(prof, orig, op_count); @@ -3412,11 +3414,32 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) prof.table_op_count[i]); } } + +int64_t tcg_cpu_exec_time(void) +{ + unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); + unsigned int i; + int64_t ret = 0; + + for (i = 0; i < n_ctxs; i++) { + const TCGContext *s = atomic_read(&tcg_ctxs[i]); + const TCGProfile *prof = &s->prof; + + ret += atomic_read(&prof->cpu_exec_time); + } + return ret; +} #else void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) { cpu_fprintf(f, "[TCG profiler not compiled]\n"); } + +int64_t tcg_cpu_exec_time(void) +{ + error_report("%s: TCG profiler not compiled", __func__); + exit(EXIT_FAILURE); +} #endif @@ -3430,7 +3453,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) #ifdef CONFIG_PROFILER { - int n; + int n = 0; QTAILQ_FOREACH(op, &s->ops, link) { n++; @@ -32,6 +32,7 @@ #include "qemu/queue.h" #include "tcg-mo.h" #include "tcg-target.h" +#include "qemu/int128.h" /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 @@ -629,12 +630,13 @@ typedef struct TCGOp { QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); typedef struct TCGProfile { + int64_t cpu_exec_time; int64_t tb_count1; int64_t tb_count; int64_t op_count; /* total insn count */ int op_count_max; /* max insn per TB */ - int64_t temp_count; int temp_count_max; + int64_t temp_count; int64_t del_op_count; int64_t code_in_len; int64_t code_out_len; @@ -1002,6 +1004,7 @@ int tcg_check_temp_count(void); #define tcg_check_temp_count() 0 #endif +int64_t tcg_cpu_exec_time(void); void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf); void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf); @@ -1454,11 +1457,14 @@ GEN_ATOMIC_HELPER_ALL(xchg) #undef GEN_ATOMIC_HELPER #endif /* CONFIG_SOFTMMU */ -#ifdef CONFIG_ATOMIC128 -#include "qemu/int128.h" - -/* These aren't really a "proper" helpers because TCG cannot manage Int128. - However, use the same format as the others, for use by the backends. */ +/* + * These aren't really a "proper" helpers because TCG cannot manage Int128. + * However, use the same format as the others, for use by the backends. + * + * The cmpxchg functions are only defined if HAVE_CMPXCHG128; + * the ld/st functions are only defined if HAVE_ATOMIC128, + * as defined by <qemu/atomic128.h>. + */ Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, Int128 cmpv, Int128 newv, TCGMemOpIdx oi, uintptr_t retaddr); @@ -1475,6 +1481,4 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, TCGMemOpIdx oi, uintptr_t retaddr); -#endif /* CONFIG_ATOMIC128 */ - #endif /* TCG_H */ |