diff options
78 files changed, 3466 insertions, 849 deletions
@@ -12,6 +12,7 @@ Fabrice Bellard <fabrice@bellard.org> bellard <bellard@c046a42c-6fe2-441c-8c8c-7 James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com> Jocelyn Mayer <l_indien@magic.fr> j_mayer <j_mayer@c046a42c-6fe2-441c-8c8c-71466251a162> Paul Brook <paul@codesourcery.com> pbrook <pbrook@c046a42c-6fe2-441c-8c8c-71466251a162> +Yongbok Kim <yongbok.kim@mips.com> <yongbok.kim@imgtec.com> Aleksandar Markovic <amarkovic@wavecomp.com> <aleksandar.markovic@mips.com> Aleksandar Markovic <amarkovic@wavecomp.com> <aleksandar.markovic@imgtec.com> Paul Burton <pburton@wavecomp.com> <paul.burton@mips.com> diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index d751bcba48..efde12fdb2 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -100,19 +100,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, cmpv, newv); +#else ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); +#endif ATOMIC_MMU_CLEANUP; return ret; } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return val; } @@ -124,9 +129,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_ST; - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) @@ -228,19 +234,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); +#else ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); +#endif ATOMIC_MMU_CLEANUP; return BSWAP(ret); } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return BSWAP(val); } @@ -253,9 +264,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ATOMIC_TRACE_ST; val = BSWAP(val); - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 6bcb6d99bd..870027d435 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -416,7 +416,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, } #endif /* See if we can patch the calling TB. */ - if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + if (last_tb) { tb_add_jump(last_tb, tb_exit, tb); } return tb; diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index f4702ce91f..af57aca5e4 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -32,6 +32,7 @@ #include "exec/log.h" #include "exec/helper-proto.h" #include "qemu/atomic.h" +#include "qemu/atomic128.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -58,9 +59,9 @@ } \ } while (0) -#define assert_cpu_is_self(this_cpu) do { \ +#define assert_cpu_is_self(cpu) do { \ if (DEBUG_TLB_GATE) { \ - g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \ + g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ } \ } while (0) @@ -73,6 +74,13 @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) +void tlb_init(CPUState *cpu) +{ + CPUArchState *env = cpu->env_ptr; + + qemu_spin_init(&env->tlb_lock); +} + /* flush_all_helper: run fn across all cpus * * If the wait flag is set then the src cpu's helper will be queued as @@ -125,8 +133,17 @@ static void tlb_flush_nocheck(CPUState *cpu) atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1); tlb_debug("(count: %zu)\n", tlb_flush_count()); + /* + * tlb_table/tlb_v_table updates from any thread must hold tlb_lock. + * However, updates from the owner thread (as is the case here; see the + * above assert_cpu_is_self) do not need atomic_set because all reads + * that do not hold the lock are performed by the same owner thread. + */ + qemu_spin_lock(&env->tlb_lock); memset(env->tlb_table, -1, sizeof(env->tlb_table)); memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); + qemu_spin_unlock(&env->tlb_lock); + cpu_tb_jmp_cache_clear(cpu); env->vtlb_index = 0; @@ -178,6 +195,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if (test_bit(mmu_idx, &mmu_idx_bitmask)) { @@ -187,6 +205,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); } } + qemu_spin_unlock(&env->tlb_lock); cpu_tb_jmp_cache_clear(cpu); @@ -239,23 +258,28 @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, target_ulong page) { return tlb_hit_page(tlb_entry->addr_read, page) || - tlb_hit_page(tlb_entry->addr_write, page) || + tlb_hit_page(tlb_addr_write(tlb_entry), page) || tlb_hit_page(tlb_entry->addr_code, page); } -static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page) +/* Called with tlb_lock held */ +static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, + target_ulong page) { if (tlb_hit_page_anyprot(tlb_entry, page)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); } } -static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx, - target_ulong page) +/* Called with tlb_lock held */ +static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, + target_ulong page) { int k; + + assert_cpu_is_self(ENV_GET_CPU(env)); for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page); + tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page); } } @@ -263,7 +287,6 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) { CPUArchState *env = cpu->env_ptr; target_ulong addr = (target_ulong) data.target_ptr; - int i; int mmu_idx; assert_cpu_is_self(cpu); @@ -281,11 +304,12 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) } addr &= TARGET_PAGE_MASK; - i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); - tlb_flush_vtlb_page(env, mmu_idx, addr); + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); } + qemu_spin_unlock(&env->tlb_lock); tb_flush_jmp_cache(cpu, addr); } @@ -314,20 +338,21 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; - int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); int mmu_idx; assert_cpu_is_self(cpu); - tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", - page, addr, mmu_idx_bitmap); + tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", + addr, mmu_idx_bitmap); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if (test_bit(mmu_idx, &mmu_idx_bitmap)) { - tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr); - tlb_flush_vtlb_page(env, mmu_idx, addr); + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); } } + qemu_spin_unlock(&env->tlb_lock); tb_flush_jmp_cache(cpu, addr); } @@ -450,72 +475,44 @@ void tlb_unprotect_code(ram_addr_t ram_addr) * most usual is detecting writes to code regions which may invalidate * generated code. * - * Because we want other vCPUs to respond to changes straight away we - * update the te->addr_write field atomically. If the TLB entry has - * been changed by the vCPU in the mean time we skip the update. + * Other vCPUs might be reading their TLBs during guest execution, so we update + * te->addr_write with atomic_set. We don't need to worry about this for + * oversized guests as MTTCG is disabled for them. * - * As this function uses atomic accesses we also need to ensure - * updates to tlb_entries follow the same access rules. We don't need - * to worry about this for oversized guests as MTTCG is disabled for - * them. + * Called with tlb_lock held. */ - -static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start, - uintptr_t length) +static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, + uintptr_t start, uintptr_t length) { -#if TCG_OVERSIZED_GUEST uintptr_t addr = tlb_entry->addr_write; if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { addr &= TARGET_PAGE_MASK; addr += tlb_entry->addend; if ((addr - start) < length) { +#if TCG_OVERSIZED_GUEST tlb_entry->addr_write |= TLB_NOTDIRTY; - } - } #else - /* paired with atomic_mb_set in tlb_set_page_with_attrs */ - uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write); - uintptr_t addr = orig_addr; - - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { - addr &= TARGET_PAGE_MASK; - addr += atomic_read(&tlb_entry->addend); - if ((addr - start) < length) { - uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY; - atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr); + atomic_set(&tlb_entry->addr_write, + tlb_entry->addr_write | TLB_NOTDIRTY); +#endif } } -#endif } -/* For atomic correctness when running MTTCG we need to use the right - * primitives when copying entries */ -static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s, - bool atomic_set) +/* + * Called with tlb_lock held. + * Called only from the vCPU context, i.e. the TLB's owner thread. + */ +static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) { -#if TCG_OVERSIZED_GUEST *d = *s; -#else - if (atomic_set) { - d->addr_read = s->addr_read; - d->addr_code = s->addr_code; - atomic_set(&d->addend, atomic_read(&s->addend)); - /* Pairs with flag setting in tlb_reset_dirty_range */ - atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write)); - } else { - d->addr_read = s->addr_read; - d->addr_write = atomic_read(&s->addr_write); - d->addr_code = s->addr_code; - d->addend = atomic_read(&s->addend); - } -#endif } /* This is a cross vCPU call (i.e. another vCPU resetting the flags of - * the target vCPU). As such care needs to be taken that we don't - * dangerously race with another vCPU update. The only thing actually - * updated is the target TLB entry ->addr_write flags. + * the target vCPU). + * We must take tlb_lock to avoid racing with another vCPU update. The only + * thing actually updated is the target TLB entry ->addr_write flags. */ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) { @@ -524,22 +521,26 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) int mmu_idx; env = cpu->env_ptr; + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { unsigned int i; for (i = 0; i < CPU_TLB_SIZE; i++) { - tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i], - start1, length); + tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, + length); } for (i = 0; i < CPU_VTLB_SIZE; i++) { - tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i], - start1, length); + tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1, + length); } } + qemu_spin_unlock(&env->tlb_lock); } -static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) +/* Called with tlb_lock held */ +static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, + target_ulong vaddr) { if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { tlb_entry->addr_write = vaddr; @@ -551,23 +552,23 @@ static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) { CPUArchState *env = cpu->env_ptr; - int i; int mmu_idx; assert_cpu_is_self(cpu); vaddr &= TARGET_PAGE_MASK; - i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); + tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); } for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr); + tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); } } + qemu_spin_unlock(&env->tlb_lock); } /* Our TLB does not support large pages, so remember the area covered by @@ -654,15 +655,24 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; } - /* Make sure there's no cached translation for the new page. */ - tlb_flush_vtlb_page(env, mmu_idx, vaddr_page); - code_address = address; iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, paddr_page, xlat, prot, &address); - index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - te = &env->tlb_table[mmu_idx][index]; + index = tlb_index(env, mmu_idx, vaddr_page); + te = tlb_entry(env, mmu_idx, vaddr_page); + + /* + * Hold the TLB lock for the rest of the function. We could acquire/release + * the lock several times in the function, but it is faster to amortize the + * acquisition cost by acquiring it just once. Note that this leads to + * a longer critical section, but this is not a concern since the TLB lock + * is unlikely to be contended. + */ + qemu_spin_lock(&env->tlb_lock); + + /* Make sure there's no cached translation for the new page. */ + tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); /* * Only evict the old entry to the victim tlb if it's for a @@ -673,7 +683,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; /* Evict the old entry into the victim tlb. */ - copy_tlb_helper(tv, te, true); + copy_tlb_helper_locked(tv, te); env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; } @@ -725,9 +735,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } } - /* Pairs with flag setting in tlb_reset_dirty_range */ - copy_tlb_helper(te, &tn, true); - /* atomic_mb_set(&te->addr_write, write_address); */ + copy_tlb_helper_locked(te, &tn); + qemu_spin_unlock(&env->tlb_lock); } /* Add a new TLB entry, but without specifying the memory @@ -773,16 +782,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, * repeat the MMU check here. This tlb_fill() call might * longjump out if this access should cause a guest exception. */ - int index; + CPUTLBEntry *entry; target_ulong tlb_addr; tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr = env->tlb_table[mmu_idx][index].addr_read; + entry = tlb_entry(env, mmu_idx, addr); + tlb_addr = entry->addr_read; if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; + uintptr_t haddr = addr + entry->addend; return ldn_p((void *)haddr, size); } @@ -840,16 +849,16 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, * repeat the MMU check here. This tlb_fill() call might * longjump out if this access should cause a guest exception. */ - int index; + CPUTLBEntry *entry; target_ulong tlb_addr; tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + entry = tlb_entry(env, mmu_idx, addr); + tlb_addr = tlb_addr_write(entry); if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; + uintptr_t haddr = addr + entry->addend; stn_p((void *)haddr, size, val); return; @@ -891,17 +900,28 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, size_t elt_ofs, target_ulong page) { size_t vidx; + + assert_cpu_is_self(ENV_GET_CPU(env)); for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; - target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); + target_ulong cmp; + + /* elt_ofs might correspond to .addr_write, so use atomic_read */ +#if TCG_OVERSIZED_GUEST + cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); +#else + cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); +#endif if (cmp == page) { /* Found entry in victim tlb, swap tlb and iotlb. */ CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; - copy_tlb_helper(&tmptlb, tlb, false); - copy_tlb_helper(tlb, vtlb, true); - copy_tlb_helper(vtlb, &tmptlb, true); + qemu_spin_lock(&env->tlb_lock); + copy_tlb_helper_locked(&tmptlb, tlb); + copy_tlb_helper_locked(tlb, vtlb); + copy_tlb_helper_locked(vtlb, &tmptlb); + qemu_spin_unlock(&env->tlb_lock); CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; @@ -924,20 +944,19 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, */ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) { - int mmu_idx, index; + uintptr_t mmu_idx = cpu_mmu_index(env, true); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); void *p; - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - mmu_idx = cpu_mmu_index(env, true); - if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) { + if (unlikely(!tlb_hit(entry->addr_code, addr))) { if (!VICTIM_TLB_HIT(addr_code, addr)) { tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); } - assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr)); + assert(tlb_hit(entry->addr_code, addr)); } - if (unlikely(env->tlb_table[mmu_idx][index].addr_code & - (TLB_RECHECK | TLB_MMIO))) { + if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) { /* * Return -1 if we can't translate and execute from an entire * page of RAM here, which will cause us to execute by loading @@ -949,7 +968,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) return -1; } - p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend); + p = (void *)((uintptr_t)addr + entry->addend); return qemu_ram_addr_from_host_nofail(p); } @@ -962,10 +981,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, uintptr_t retaddr) { - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - if (!tlb_hit(tlb_addr, addr)) { + if (!tlb_hit(tlb_addr_write(entry), addr)) { /* TLB entry is for a different page */ if (!VICTIM_TLB_HIT(addr_write, addr)) { tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, @@ -981,9 +1000,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, NotDirtyInfo *ndi) { size_t mmu_idx = get_mmuidx(oi); - size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index]; - target_ulong tlb_addr = tlbe->addr_write; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(tlbe); TCGMemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); int s_bits = mop & MO_SIZE; @@ -1014,7 +1033,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; } /* Notice an IO access or a needs-MMU-lookup access */ @@ -1101,7 +1120,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #include "atomic_template.h" #endif -#ifdef CONFIG_ATOMIC128 +#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 #define DATA_SIZE 16 #include "atomic_template.h" #endif diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h index f060a693d4..b0adea045e 100644 --- a/accel/tcg/softmmu_template.h +++ b/accel/tcg/softmmu_template.h @@ -111,9 +111,10 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = entry->ADDR_READ; unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; DATA_TYPE res; @@ -129,7 +130,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlb_addr = entry->ADDR_READ; } /* Handle an IO access. */ @@ -166,7 +167,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, return res; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; #if DATA_SIZE == 1 res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); #else @@ -179,9 +180,10 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = entry->ADDR_READ; unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; DATA_TYPE res; @@ -197,7 +199,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlb_addr = entry->ADDR_READ; } /* Handle an IO access. */ @@ -234,7 +236,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, return res; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); return res; } @@ -275,9 +277,10 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -292,7 +295,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -313,16 +316,16 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, if (DATA_SIZE > 1 && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 >= TARGET_PAGE_SIZE)) { - int i, index2; - target_ulong page2, tlb_addr2; + int i; + target_ulong page2; + CPUTLBEntry *entry2; do_unaligned_access: /* Ensure the second page is in the TLB. Note that the first page is already guaranteed to be filled, and that the second page cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; - if (!tlb_hit_page(tlb_addr2, page2) + entry2 = tlb_entry(env, mmu_idx, page2); + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); @@ -340,7 +343,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, return; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; #if DATA_SIZE == 1 glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); #else @@ -352,9 +355,10 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -369,7 +373,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -390,16 +394,16 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, if (DATA_SIZE > 1 && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 >= TARGET_PAGE_SIZE)) { - int i, index2; - target_ulong page2, tlb_addr2; + int i; + target_ulong page2; + CPUTLBEntry *entry2; do_unaligned_access: /* Ensure the second page is in the TLB. Note that the first page is already guaranteed to be filled, and that the second page cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; - if (!tlb_hit_page(tlb_addr2, page2) + entry2 = tlb_entry(env, mmu_idx, page2); + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); @@ -417,7 +421,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, return; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); } #endif /* DATA_SIZE > 1 */ diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 56dbb56a16..3d25bdcc17 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -51,7 +51,7 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) if (!qemu_cpu_is_self(cpu)) { qemu_cpu_kick(cpu); } else { - cpu->icount_decr.u16.high = -1; + atomic_set(&cpu->icount_decr.u16.high, -1); if (use_icount && !cpu->can_do_io && (mask & ~old_mask) != 0) { diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index ad5c758246..356dcd0948 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2341,7 +2341,7 @@ void cpu_interrupt(CPUState *cpu, int mask) { g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; - cpu->icount_decr.u16.high = -1; + atomic_set(&cpu->icount_decr.u16.high, -1); } /* diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 26a3ffbba1..cd75829cf2 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "translate-all.h" #include "exec/helper-proto.h" +#include "qemu/atomic128.h" #undef EAX #undef ECX @@ -615,7 +616,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* The following is only callable from other helpers, and matches up with the softmmu version. */ -#ifdef CONFIG_ATOMIC128 +#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 #undef EXTRA_ARGS #undef ATOMIC_NAME @@ -628,4 +629,4 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #define DATA_SIZE 16 #include "atomic_template.h" -#endif /* CONFIG_ATOMIC128 */ +#endif @@ -5154,6 +5154,21 @@ EOF fi fi +cmpxchg128=no +if test "$int128" = yes -a "$atomic128" = no; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + __sync_val_compare_and_swap_16(&x, y, x); + return 0; +} +EOF + if compile_prog "" "" ; then + cmpxchg128=yes + fi +fi + ######################################### # See if 64-bit atomic operations are supported. # Note that without __atomic builtins, we can only @@ -6663,6 +6678,10 @@ if test "$atomic128" = "yes" ; then echo "CONFIG_ATOMIC128=y" >> $config_host_mak fi +if test "$cmpxchg128" = "yes" ; then + echo "CONFIG_CMPXCHG128=y" >> $config_host_mak +fi + if test "$atomic64" = "yes" ; then echo "CONFIG_ATOMIC64=y" >> $config_host_mak fi @@ -1425,7 +1425,8 @@ static int tcg_cpu_exec(CPUState *cpu) ret = cpu_exec(cpu); cpu_exec_end(cpu); #ifdef CONFIG_PROFILER - tcg_time += profile_getclock() - ti; + atomic_set(&tcg_ctx->prof.cpu_exec_time, + tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); #endif return ret; } diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index 70cfb9ce7d..6302469d0e 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -110,6 +110,40 @@ Note: HeartBeat has not been implemented yet, so you need to trigger failover process by using 'x-colo-lost-heartbeat' command. +== COLO operation status == + ++-----------------+ +| | +| Start COLO | +| | ++--------+--------+ + | + | Main qmp command: + | migrate-set-capabilities with x-colo + | migrate + | + v ++--------+--------+ +| | +| COLO running | +| | ++--------+--------+ + | + | Main qmp command: + | x-colo-lost-heartbeat + | or + | some error happened + v ++--------+--------+ +| | send qmp event: +| COLO failover | COLO_EXIT +| | ++-----------------+ + +COLO use the qmp command to switch and report operation status. +The diagram just shows the main qmp command, you can get the detail +in test procedure. + == Test procedure == 1. Startup qemu Primary: @@ -965,6 +965,7 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) tcg_target_initialized = true; cc->tcg_initialize(); } + tlb_init(cpu); #ifndef CONFIG_USER_ONLY if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 13a9494a8d..5e144cb4e4 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -36,6 +36,7 @@ #include "qemu/range.h" #include "e1000x_common.h" +#include "trace.h" static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -847,6 +848,15 @@ static uint64_t rx_desc_base(E1000State *s) return (bah << 32) + bal; } +static void +e1000_receiver_overrun(E1000State *s, size_t size) +{ + trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]); + e1000x_inc_reg_if_not_full(s->mac_reg, RNBC); + e1000x_inc_reg_if_not_full(s->mac_reg, MPC); + set_ics(s, 0, E1000_ICS_RXO); +} + static ssize_t e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { @@ -916,8 +926,8 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) desc_offset = 0; total_size = size + e1000x_fcs_len(s->mac_reg); if (!e1000_has_rxbufs(s, total_size)) { - set_ics(s, 0, E1000_ICS_RXO); - return -1; + e1000_receiver_overrun(s, total_size); + return -1; } do { desc_size = total_size - desc_offset; @@ -969,7 +979,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) { DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n", rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]); - set_ics(s, 0, E1000_ICS_RXO); + e1000_receiver_overrun(s, total_size); return -1; } } while (desc_offset < total_size); diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c index 07d79e317f..869518ee06 100644 --- a/hw/net/ne2000.c +++ b/hw/net/ne2000.c @@ -174,7 +174,7 @@ static int ne2000_buffer_full(NE2000State *s) ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_) { NE2000State *s = qemu_get_nic_opaque(nc); - int size = size_; + size_t size = size_; uint8_t *p; unsigned int total_len, next, avail, len, index, mcast_idx; uint8_t buf1[60]; @@ -182,7 +182,7 @@ ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_) { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; #if defined(DEBUG_NE2000) - printf("NE2000: received len=%d\n", size); + printf("NE2000: received len=%zu\n", size); #endif if (s->cmd & E8390_STOP || ne2000_buffer_full(s)) diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c index 0c44554168..d9ba04bdfc 100644 --- a/hw/net/pcnet.c +++ b/hw/net/pcnet.c @@ -988,14 +988,14 @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_) uint8_t buf1[60]; int remaining; int crc_err = 0; - int size = size_; + size_t size = size_; if (CSR_DRX(s) || CSR_STOP(s) || CSR_SPND(s) || !size || (CSR_LOOP(s) && !s->looptest)) { return -1; } #ifdef PCNET_DEBUG - printf("pcnet_receive size=%d\n", size); + printf("pcnet_receive size=%zu\n", size); #endif /* if too small buffer, then expand it */ diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 46daa16202..2342a095e3 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -817,7 +817,7 @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t RTL8139State *s = qemu_get_nic_opaque(nc); PCIDevice *d = PCI_DEVICE(s); /* size is the length of the buffer passed to the driver */ - int size = size_; + size_t size = size_; const uint8_t *dot1q_buf = NULL; uint32_t packet_header = 0; @@ -826,7 +826,7 @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t static const uint8_t broadcast_macaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - DPRINTF(">>> received len=%d\n", size); + DPRINTF(">>> received len=%zu\n", size); /* test if board clock is stopped */ if (!s->clock_enabled) @@ -1035,7 +1035,7 @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t if (size+4 > rx_space) { - DPRINTF("C+ Rx mode : descriptor %d size %d received %d + 4\n", + DPRINTF("C+ Rx mode : descriptor %d size %d received %zu + 4\n", descriptor, rx_space, size); s->IntrStatus |= RxOverflow; @@ -1148,7 +1148,7 @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t if (avail != 0 && RX_ALIGN(size + 8) >= avail) { DPRINTF("rx overflow: rx buffer length %d head 0x%04x " - "read 0x%04x === available 0x%04x need 0x%04x\n", + "read 0x%04x === available 0x%04x need 0x%04zx\n", s->RxBufferSize, s->RxBufAddr, s->RxBufPtr, avail, size + 8); s->IntrStatus |= RxOverflow; diff --git a/hw/net/trace-events b/hw/net/trace-events index c1dea4b156..9d49f62fa1 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -98,6 +98,9 @@ net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash" net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X" net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes" +# hw/net/e1000.c +e1000_receiver_overrun(size_t s, uint32_t rdh, uint32_t rdt) "Receiver overrun: dropped packet of %zu bytes, RDH=%u, RDT=%u" + # hw/net/e1000x_common.c e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d" e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X" diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 4e61944f14..4136d239dd 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1611,6 +1611,8 @@ void virtio_del_queue(VirtIODevice *vdev, int n) vdev->vq[n].vring.num = 0; vdev->vq[n].vring.num_default = 0; + vdev->vq[n].handle_output = NULL; + vdev->vq[n].handle_aio_output = NULL; } static void virtio_set_isr(VirtIODevice *vdev, int value) diff --git a/include/elf.h b/include/elf.h index 312f68af81..5f45f9b997 100644 --- a/include/elf.h +++ b/include/elf.h @@ -28,8 +28,11 @@ typedef int64_t Elf64_Sxword; #define PT_PHDR 6 #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff -#define PT_MIPS_REGINFO 0x70000000 -#define PT_MIPS_OPTIONS 0x70000001 + +#define PT_MIPS_REGINFO 0x70000000 +#define PT_MIPS_RTPROC 0x70000001 +#define PT_MIPS_OPTIONS 0x70000002 +#define PT_MIPS_ABIFLAGS 0x70000003 /* Flags in the e_flags field of the header */ /* MIPS architecture level. */ @@ -76,14 +79,38 @@ typedef int64_t Elf64_Sxword; #define EF_MIPS_MACH_OCTEON2 0x008d0000 /* Cavium Networks Octeon2 */ #define EF_MIPS_MACH_OCTEON3 0x008e0000 /* Cavium Networks Octeon3 */ #define EF_MIPS_MACH_5400 0x00910000 /* NEC VR5400 */ -#define EF_MIPS_MACH_5900 0x00920000 /* MIPS R5900 */ +#define EF_MIPS_MACH_5900 0x00920000 /* Toshiba/Sony R5900 */ #define EF_MIPS_MACH_5500 0x00980000 /* NEC VR5500 */ -#define EF_MIPS_MACH_9000 0x00990000 /* PMC-Sierra's RM9000 */ +#define EF_MIPS_MACH_9000 0x00990000 /* PMC-Sierra RM9000 */ #define EF_MIPS_MACH_LS2E 0x00a00000 /* ST Microelectronics Loongson 2E */ #define EF_MIPS_MACH_LS2F 0x00a10000 /* ST Microelectronics Loongson 2F */ #define EF_MIPS_MACH_LS3A 0x00a20000 /* ST Microelectronics Loongson 3A */ #define EF_MIPS_MACH 0x00ff0000 /* EF_MIPS_MACH_xxx selection mask */ +#define MIPS_ABI_FP_ANY 0x0 /* FP ABI doesn't matter */ +#define MIPS_ABI_FP_DOUBLE 0x1 /* -mdouble-float */ +#define MIPS_ABI_FP_SINGLE 0x2 /* -msingle-float */ +#define MIPS_ABI_FP_SOFT 0x3 /* -msoft-float */ +#define MIPS_ABI_FP_OLD_64 0x4 /* -mips32r2 -mfp64 */ +#define MIPS_ABI_FP_XX 0x5 /* -mfpxx */ +#define MIPS_ABI_FP_64 0x6 /* -mips32r2 -mfp64 */ +#define MIPS_ABI_FP_64A 0x7 /* -mips32r2 -mfp64 -mno-odd-spreg */ + +typedef struct mips_elf_abiflags_v0 { + uint16_t version; /* Version of flags structure */ + uint8_t isa_level; /* The level of the ISA: 1-5, 32, 64 */ + uint8_t isa_rev; /* The revision of ISA: */ + /* - 0 for MIPS V and below, */ + /* - 1-n otherwise. */ + uint8_t gpr_size; /* The size of general purpose registers */ + uint8_t cpr1_size; /* The size of co-processor 1 registers */ + uint8_t cpr2_size; /* The size of co-processor 2 registers */ + uint8_t fp_abi; /* The floating-point ABI */ + uint32_t isa_ext; /* Mask of processor-specific extensions */ + uint32_t ases; /* Mask of ASEs used */ + uint32_t flags1; /* Mask of general flags */ + uint32_t flags2; +} Mips_elf_abiflags_v0; /* These constants define the different elf file types */ #define ET_NONE 0 diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index a171ffc1a4..4ff62f32bf 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -24,6 +24,7 @@ #endif #include "qemu/host-utils.h" +#include "qemu/thread.h" #include "qemu/queue.h" #ifdef CONFIG_TCG #include "tcg-target.h" @@ -142,6 +143,8 @@ typedef struct CPUIOTLBEntry { #define CPU_COMMON_TLB \ /* The meaning of the MMU modes is defined in the target code. */ \ + /* tlb_lock serializes updates to tlb_table and tlb_v_table */ \ + QemuSpin tlb_lock; \ CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \ CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \ diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 41ed0526e2..959068495a 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -126,6 +126,29 @@ extern __thread uintptr_t helper_retaddr; /* The memory helpers for tcg-generated code need tcg_target_long etc. */ #include "tcg.h" +static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) +{ +#if TCG_OVERSIZED_GUEST + return entry->addr_write; +#else + return atomic_read(&entry->addr_write); +#endif +} + +/* Find the TLB index corresponding to the mmu_idx + address pair. */ +static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); +} + +/* Find the TLB entry corresponding to the mmu_idx + address pair. */ +static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, + target_ulong addr) +{ + return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; +} + #ifdef MMU_MODE0_SUFFIX #define CPU_MMU_INDEX 0 #define MEMSUFFIX MMU_MODE0_SUFFIX @@ -416,8 +439,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, #if defined(CONFIG_USER_ONLY) return g2h(addr); #else - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; + CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr); abi_ptr tlb_addr; uintptr_t haddr; @@ -426,7 +448,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, tlb_addr = tlbentry->addr_read; break; case 1: - tlb_addr = tlbentry->addr_write; + tlb_addr = tlb_addr_write(tlbentry); break; case 2: tlb_addr = tlbentry->addr_code; @@ -445,7 +467,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, return NULL; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + tlbentry->addend; return (void *)haddr; #endif /* defined(CONFIG_USER_ONLY) */ } diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h index 4db2302962..0f061d47ef 100644 --- a/include/exec/cpu_ldst_template.h +++ b/include/exec/cpu_ldst_template.h @@ -81,7 +81,7 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) { - int page_index; + CPUTLBEntry *entry; RES_TYPE res; target_ulong addr; int mmu_idx; @@ -94,15 +94,15 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #endif addr = ptr; - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != + entry = tlb_entry(env, mmu_idx, addr); + if (unlikely(entry->ADDR_READ != (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { oi = make_memop_idx(SHIFT, mmu_idx); res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + uintptr_t hostaddr = addr + entry->addend; res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr); } return res; @@ -120,7 +120,8 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) { - int res, page_index; + CPUTLBEntry *entry; + int res; target_ulong addr; int mmu_idx; TCGMemOpIdx oi; @@ -132,15 +133,15 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #endif addr = ptr; - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != + entry = tlb_entry(env, mmu_idx, addr); + if (unlikely(entry->ADDR_READ != (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { oi = make_memop_idx(SHIFT, mmu_idx); res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + uintptr_t hostaddr = addr + entry->addend; res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr); } return res; @@ -162,7 +163,7 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, target_ulong ptr, RES_TYPE v, uintptr_t retaddr) { - int page_index; + CPUTLBEntry *entry; target_ulong addr; int mmu_idx; TCGMemOpIdx oi; @@ -174,15 +175,15 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #endif addr = ptr; - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; - if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write != + entry = tlb_entry(env, mmu_idx, addr); + if (unlikely(tlb_addr_write(entry) != (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { oi = make_memop_idx(SHIFT, mmu_idx); glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, retaddr); } else { - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + uintptr_t hostaddr = addr + entry->addend; glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v); } } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 5f78125582..815e5b1e83 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -100,6 +100,11 @@ void cpu_address_space_init(CPUState *cpu, int asidx, #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) /* cputlb.c */ /** + * tlb_init - initialize a CPU's TLB + * @cpu: CPU whose TLB should be initialized + */ +void tlb_init(CPUState *cpu); +/** * tlb_flush_page: * @cpu: CPU whose TLB should be flushed * @addr: virtual address of page to be flushed @@ -258,6 +263,9 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, uintptr_t retaddr); #else +static inline void tlb_init(CPUState *cpu) +{ +} static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) { } diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 3abb639056..9ecd911c3e 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -27,6 +27,7 @@ struct RAMBlock { struct rcu_head rcu; struct MemoryRegion *mr; uint8_t *host; + uint8_t *colo_cache; /* For colo, VM's ram cache */ ram_addr_t offset; ram_addr_t used_length; ram_addr_t max_length; diff --git a/include/migration/colo.h b/include/migration/colo.h index 2fe48ad353..99ce17aca7 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -16,14 +16,21 @@ #include "qemu-common.h" #include "qapi/qapi-types-migration.h" +enum colo_event { + COLO_EVENT_NONE, + COLO_EVENT_CHECKPOINT, + COLO_EVENT_FAILOVER, +}; + void colo_info_init(void); void migrate_start_colo_process(MigrationState *s); bool migration_in_colo_state(void); /* loadvm */ -bool migration_incoming_enable_colo(void); -void migration_incoming_exit_colo(void); +void migration_incoming_enable_colo(void); +void migration_incoming_disable_colo(void); +bool migration_incoming_colo_enabled(void); void *colo_process_incoming_thread(void *opaque); bool migration_incoming_in_colo_state(void); diff --git a/include/net/filter.h b/include/net/filter.h index 435acd6f82..49da666ac0 100644 --- a/include/net/filter.h +++ b/include/net/filter.h @@ -38,6 +38,8 @@ typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc, typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp); +typedef void (FilterHandleEvent) (NetFilterState *nf, int event, Error **errp); + typedef struct NetFilterClass { ObjectClass parent_class; @@ -45,6 +47,7 @@ typedef struct NetFilterClass { FilterSetup *setup; FilterCleanup *cleanup; FilterStatusChanged *status_changed; + FilterHandleEvent *handle_event; /* mandatory */ FilterReceiveIOV *receive_iov; } NetFilterClass; @@ -77,4 +80,6 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, int iovcnt, void *opaque); +void colo_notify_filters_event(int event, Error **errp); + #endif /* QEMU_NET_FILTER_H */ diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h new file mode 100644 index 0000000000..a6af22ff10 --- /dev/null +++ b/include/qemu/atomic128.h @@ -0,0 +1,153 @@ +/* + * Simple interface for 128-bit atomic operations. + * + * Copyright (C) 2018 Linaro, Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * See docs/devel/atomics.txt for discussion about the guarantees each + * atomic primitive is meant to provide. + */ + +#ifndef QEMU_ATOMIC128_H +#define QEMU_ATOMIC128_H + +/* + * GCC is a house divided about supporting large atomic operations. + * + * For hosts that only have large compare-and-swap, a legalistic reading + * of the C++ standard means that one cannot implement __atomic_read on + * read-only memory, and thus all atomic operations must synchronize + * through libatomic. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878 + * + * This interpretation is not especially helpful for QEMU. + * For softmmu, all RAM is always read/write from the hypervisor. + * For user-only, if the guest doesn't implement such an __atomic_read + * then the host need not worry about it either. + * + * Moreover, using libatomic is not an option, because its interface is + * built for std::atomic<T>, and requires that *all* accesses to such an + * object go through the library. In our case we do not have an object + * in the C/C++ sense, but a view of memory as seen by the guest. + * The guest may issue a large atomic operation and then access those + * pieces using word-sized accesses. From the hypervisor, we have no + * way to connect those two actions. + * + * Therefore, special case each platform. + */ + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return atomic_cmpxchg__nocheck(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(CONFIG_CMPXCHG128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return __sync_val_compare_and_swap_16(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(__aarch64__) +/* Through gcc 8, aarch64 has no support for 128-bit at all. */ +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp); + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); + uint64_t oldl, oldh; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "cmp %[oldl], %[cmpl]\n\t" + "ccmp %[oldh], %[cmph], #0, eq\n\t" + "b.ne 1f\n\t" + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" + "cbnz %w[tmp], 0b\n" + "1:" + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), + [oldl] "=&r"(oldl), [oldh] "=r"(oldh) + : [cmpl] "r"(cmpl), [cmph] "r"(cmph), + [newl] "r"(newl), [newh] "r"(newh) + : "memory", "cc"); + + return int128_make128(oldl, oldh); +} +# define HAVE_CMPXCHG128 1 +#else +/* Fallback definition that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") + atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new); +# define HAVE_CMPXCHG128 0 +#endif /* Some definition for HAVE_CMPXCHG128 */ + + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_read(Int128 *ptr) +{ + return atomic_read__nocheck(ptr); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + atomic_set__nocheck(ptr, val); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__) +/* We can do better than cmpxchg for AArch64. */ +static inline Int128 atomic16_read(Int128 *ptr) +{ + uint64_t l, h; + uint32_t tmp; + + /* The load must be paired with the store to guarantee not tearing. */ + asm("0: ldxp %[l], %[h], %[mem]\n\t" + "stxp %w[tmp], %[l], %[h], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h)); + + return int128_make128(l, h); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + uint64_t l = int128_getlo(val), h = int128_gethi(val); + uint64_t t1, t2; + + /* Load into temporaries to acquire the exclusive access lock. */ + asm("0: ldxp %[t1], %[t2], %[mem]\n\t" + "stxp %w[t1], %[l], %[h], %[mem]\n\t" + "cbnz %w[t1], 0b" + : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2) + : [l] "r"(l), [h] "r"(h)); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128 +static inline Int128 atomic16_read(Int128 *ptr) +{ + /* Maybe replace 0 with 0, returning the old value. */ + return atomic16_cmpxchg(ptr, 0, 0); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + Int128 old = *ptr, cmp; + do { + cmp = old; + old = atomic16_cmpxchg(ptr, cmp, val); + } while (old != cmp); +} + +# define HAVE_ATOMIC128 1 +#else +/* Fallback definitions that must be optimized away, or error. */ +Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr); +void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val); +# define HAVE_ATOMIC128 0 +#endif /* Some definition for HAVE_ATOMIC128 */ + +#endif /* QEMU_ATOMIC128_H */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 66f8c241dc..6b92710487 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -146,6 +146,17 @@ # define QEMU_FLATTEN #endif +/* + * If __attribute__((error)) is present, use it to produce an error at + * compile time. Otherwise, one must wait for the linker to diagnose + * the missing symbol. + */ +#if __has_attribute(error) +# define QEMU_ERROR(X) __attribute__((error(X))) +#else +# define QEMU_ERROR(X) +#endif + /* Implement C11 _Generic via GCC builtins. Example: * * QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 9f37c92bd1..a86330c987 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -1044,7 +1044,6 @@ static inline int64_t profile_getclock(void) return get_clock(); } -extern int64_t tcg_time; extern int64_t dev_time; #endif diff --git a/linux-user/mips/target_syscall.h b/linux-user/mips/target_syscall.h index 2fca1c6bf9..33177af113 100644 --- a/linux-user/mips/target_syscall.h +++ b/linux-user/mips/target_syscall.h @@ -244,4 +244,8 @@ static inline abi_ulong target_shmlba(CPUMIPSState *env) return 0x40000; } +/* MIPS-specific prctl() options */ +#define TARGET_PR_SET_FP_MODE 45 +#define TARGET_PR_GET_FP_MODE 46 + #endif /* MIPS_TARGET_SYSCALL_H */ diff --git a/linux-user/mips64/target_syscall.h b/linux-user/mips64/target_syscall.h index 078437d765..c1160e69f8 100644 --- a/linux-user/mips64/target_syscall.h +++ b/linux-user/mips64/target_syscall.h @@ -241,4 +241,8 @@ static inline abi_ulong target_shmlba(CPUMIPSState *env) return 0x40000; } +/* MIPS-specific prctl() options */ +#define TARGET_PR_SET_FP_MODE 45 +#define TARGET_PR_GET_FP_MODE 46 + #endif /* MIPS64_TARGET_SYSCALL_H */ diff --git a/linux-user/qemu.h b/linux-user/qemu.h index b4959e41c6..1beb6a2cfc 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -461,27 +461,59 @@ static inline int access_ok(int type, abi_ulong addr, abi_ulong size) These are usually used to access struct data members once the struct has been locked - usually with lock_user_struct. */ -/* Tricky points: - - Use __builtin_choose_expr to avoid type promotion from ?:, - - Invalid sizes result in a compile time error stemming from - the fact that abort has no parameters. - - It's easier to use the endian-specific unaligned load/store - functions than host-endian unaligned load/store plus tswapN. */ - -#define __put_user_e(x, hptr, e) \ - (__builtin_choose_expr(sizeof(*(hptr)) == 1, stb_p, \ - __builtin_choose_expr(sizeof(*(hptr)) == 2, stw_##e##_p, \ - __builtin_choose_expr(sizeof(*(hptr)) == 4, stl_##e##_p, \ - __builtin_choose_expr(sizeof(*(hptr)) == 8, stq_##e##_p, abort)))) \ - ((hptr), (x)), (void)0) - -#define __get_user_e(x, hptr, e) \ - ((x) = (typeof(*hptr))( \ - __builtin_choose_expr(sizeof(*(hptr)) == 1, ldub_p, \ - __builtin_choose_expr(sizeof(*(hptr)) == 2, lduw_##e##_p, \ - __builtin_choose_expr(sizeof(*(hptr)) == 4, ldl_##e##_p, \ - __builtin_choose_expr(sizeof(*(hptr)) == 8, ldq_##e##_p, abort)))) \ - (hptr)), (void)0) +/* + * Tricky points: + * - Use __builtin_choose_expr to avoid type promotion from ?:, + * - Invalid sizes result in a compile time error stemming from + * the fact that abort has no parameters. + * - It's easier to use the endian-specific unaligned load/store + * functions than host-endian unaligned load/store plus tswapN. + * - The pragmas are necessary only to silence a clang false-positive + * warning: see https://bugs.llvm.org/show_bug.cgi?id=39113 . + * - We have to disable -Wpragmas warnings to avoid a complaint about + * an unknown warning type from older compilers that don't know about + * -Waddress-of-packed-member. + * - gcc has bugs in its _Pragma() support in some versions, eg + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 -- so we only + * include the warning-suppression pragmas for clang + */ +#ifdef __clang__ +#define PRAGMA_DISABLE_PACKED_WARNING \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wpragmas\""); \ + _Pragma("GCC diagnostic ignored \"-Waddress-of-packed-member\"") + +#define PRAGMA_REENABLE_PACKED_WARNING \ + _Pragma("GCC diagnostic pop") + +#else +#define PRAGMA_DISABLE_PACKED_WARNING +#define PRAGMA_REENABLE_PACKED_WARNING +#endif + +#define __put_user_e(x, hptr, e) \ + do { \ + PRAGMA_DISABLE_PACKED_WARNING; \ + (__builtin_choose_expr(sizeof(*(hptr)) == 1, stb_p, \ + __builtin_choose_expr(sizeof(*(hptr)) == 2, stw_##e##_p, \ + __builtin_choose_expr(sizeof(*(hptr)) == 4, stl_##e##_p, \ + __builtin_choose_expr(sizeof(*(hptr)) == 8, stq_##e##_p, abort)))) \ + ((hptr), (x)), (void)0); \ + PRAGMA_REENABLE_PACKED_WARNING; \ + } while (0) + +#define __get_user_e(x, hptr, e) \ + do { \ + PRAGMA_DISABLE_PACKED_WARNING; \ + ((x) = (typeof(*hptr))( \ + __builtin_choose_expr(sizeof(*(hptr)) == 1, ldub_p, \ + __builtin_choose_expr(sizeof(*(hptr)) == 2, lduw_##e##_p, \ + __builtin_choose_expr(sizeof(*(hptr)) == 4, ldl_##e##_p, \ + __builtin_choose_expr(sizeof(*(hptr)) == 8, ldq_##e##_p, abort)))) \ + (hptr)), (void)0); \ + PRAGMA_REENABLE_PACKED_WARNING; \ + } while (0) + #ifdef TARGET_WORDS_BIGENDIAN # define __put_user(x, hptr) __put_user_e(x, hptr, be) diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c index b4c60aa446..e44e99993c 100644 --- a/linux-user/sparc/signal.c +++ b/linux-user/sparc/signal.c @@ -258,10 +258,6 @@ void setup_frame(int sig, struct target_sigaction *ka, __put_user(val32, &sf->insns[1]); if (err) goto sigsegv; - - /* Flush instruction space. */ - // flush_sig_insns(current->mm, (unsigned long) &(sf->insns[0])); - // tb_flush(env); } unlock_user(sf, sf_addr, sizeof(struct target_signal_frame)); return; diff --git a/linux-user/syscall.c b/linux-user/syscall.c index ae3c0dfef7..d2cc971143 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -9347,6 +9347,14 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, return ret; } #endif +#ifdef TARGET_MIPS + case TARGET_PR_GET_FP_MODE: + /* TODO: Implement TARGET_PR_SET_FP_MODE handling.*/ + return -TARGET_EINVAL; + case TARGET_PR_SET_FP_MODE: + /* TODO: Implement TARGET_PR_GET_FP_MODE handling.*/ + return -TARGET_EINVAL; +#endif /* MIPS */ #ifdef TARGET_AARCH64 case TARGET_PR_SVE_SET_VL: /* diff --git a/migration/Makefile.objs b/migration/Makefile.objs index c83ec47ba8..a4f3bafd86 100644 --- a/migration/Makefile.objs +++ b/migration/Makefile.objs @@ -1,6 +1,6 @@ common-obj-y += migration.o socket.o fd.o exec.o common-obj-y += tls.o channel.o savevm.o -common-obj-y += colo-comm.o colo.o colo-failover.o +common-obj-y += colo.o colo-failover.o common-obj-y += vmstate.o vmstate-types.o page_cache.o common-obj-y += qemu-file.o global_state.o common-obj-y += qemu-file-channel.o diff --git a/migration/colo-comm.c b/migration/colo-comm.c deleted file mode 100644 index df26e4dfe7..0000000000 --- a/migration/colo-comm.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) - * (a.k.a. Fault Tolerance or Continuous Replication) - * - * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. - * Copyright (c) 2016 FUJITSU LIMITED - * Copyright (c) 2016 Intel Corporation - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * later. See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "migration.h" -#include "migration/colo.h" -#include "migration/vmstate.h" -#include "trace.h" - -typedef struct { - bool colo_requested; -} COLOInfo; - -static COLOInfo colo_info; - -COLOMode get_colo_mode(void) -{ - if (migration_in_colo_state()) { - return COLO_MODE_PRIMARY; - } else if (migration_incoming_in_colo_state()) { - return COLO_MODE_SECONDARY; - } else { - return COLO_MODE_UNKNOWN; - } -} - -static int colo_info_pre_save(void *opaque) -{ - COLOInfo *s = opaque; - - s->colo_requested = migrate_colo_enabled(); - - return 0; -} - -static bool colo_info_need(void *opaque) -{ - return migrate_colo_enabled(); -} - -static const VMStateDescription colo_state = { - .name = "COLOState", - .version_id = 1, - .minimum_version_id = 1, - .pre_save = colo_info_pre_save, - .needed = colo_info_need, - .fields = (VMStateField[]) { - VMSTATE_BOOL(colo_requested, COLOInfo), - VMSTATE_END_OF_LIST() - }, -}; - -void colo_info_init(void) -{ - vmstate_register(NULL, 0, &colo_state, &colo_info); -} - -bool migration_incoming_enable_colo(void) -{ - return colo_info.colo_requested; -} - -void migration_incoming_exit_colo(void) -{ - colo_info.colo_requested = false; -} diff --git a/migration/colo-failover.c b/migration/colo-failover.c index 0ae0c41221..4854a96c92 100644 --- a/migration/colo-failover.c +++ b/migration/colo-failover.c @@ -77,7 +77,7 @@ FailoverStatus failover_get_state(void) void qmp_x_colo_lost_heartbeat(Error **errp) { - if (get_colo_mode() == COLO_MODE_UNKNOWN) { + if (get_colo_mode() == COLO_MODE_NONE) { error_setg(errp, QERR_FEATURE_DISABLED, "colo"); return; } diff --git a/migration/colo.c b/migration/colo.c index 88936f5962..956ac236b7 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -25,8 +25,16 @@ #include "qemu/error-report.h" #include "migration/failover.h" #include "replication.h" +#include "net/colo-compare.h" +#include "net/colo.h" +#include "block/block.h" +#include "qapi/qapi-events-migration.h" +#include "qapi/qmp/qerror.h" +#include "sysemu/cpus.h" +#include "net/filter.h" static bool vmstate_loading; +static Notifier packets_compare_notifier; #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) @@ -53,6 +61,7 @@ static void secondary_vm_do_failover(void) { int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; /* Can not do failover during the process of VM's loading VMstate, Or * it will break the secondary VM. @@ -70,6 +79,17 @@ static void secondary_vm_do_failover(void) migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); + replication_stop_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + + /* Notify all filters of all NIC to do checkpoint */ + colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err); + if (local_err) { + error_report_err(local_err); + } + if (!autostart) { error_report("\"-S\" qemu option will be ignored in secondary side"); /* recover runstate to normal migration finish state */ @@ -107,9 +127,15 @@ static void primary_vm_do_failover(void) { MigrationState *s = migrate_get_current(); int old_state; + Error *local_err = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); + /* + * kick COLO thread which might wait at + * qemu_sem_wait(&s->colo_checkpoint_sem). + */ + colo_checkpoint_notify(migrate_get_current()); /* * Wake up COLO thread which may blocked in recv() or send(), @@ -130,10 +156,28 @@ static void primary_vm_do_failover(void) FailoverStatus_str(old_state)); return; } + + replication_stop_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + local_err = NULL; + } + /* Notify COLO thread that failover work is finished */ qemu_sem_post(&s->colo_exit_sem); } +COLOMode get_colo_mode(void) +{ + if (migration_in_colo_state()) { + return COLO_MODE_PRIMARY; + } else if (migration_incoming_in_colo_state()) { + return COLO_MODE_SECONDARY; + } else { + return COLO_MODE_NONE; + } +} + void colo_do_failover(MigrationState *s) { /* Make sure VM stopped while failover happened. */ @@ -207,6 +251,26 @@ void qmp_xen_colo_do_checkpoint(Error **errp) #endif } +COLOStatus *qmp_query_colo_status(Error **errp) +{ + COLOStatus *s = g_new0(COLOStatus, 1); + + s->mode = get_colo_mode(); + + switch (failover_get_state()) { + case FAILOVER_STATUS_NONE: + s->reason = COLO_EXIT_REASON_NONE; + break; + case FAILOVER_STATUS_REQUIRE: + s->reason = COLO_EXIT_REASON_REQUEST; + break; + default: + s->reason = COLO_EXIT_REASON_ERROR; + } + + return s; +} + static void colo_send_message(QEMUFile *f, COLOMessage msg, Error **errp) { @@ -343,21 +407,42 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); + if (local_err) { + goto out; + } + /* Disable block migration */ migrate_set_block_enabled(false, &local_err); - qemu_savevm_state_header(fb); - qemu_savevm_state_setup(fb); qemu_mutex_lock_iothread(); - qemu_savevm_state_complete_precopy(fb, false, false); - qemu_mutex_unlock_iothread(); - - qemu_fflush(fb); + replication_do_checkpoint_all(&local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } + /* Note: device state is saved into buffer */ + ret = qemu_save_device_state(fb); + + qemu_mutex_unlock_iothread(); + if (ret < 0) { goto out; } /* + * Only save VM's live state, which not including device state. + * TODO: We may need a timeout mechanism to prevent COLO process + * to be blocked here. + */ + qemu_savevm_live_state(s->to_dst_file); + + qemu_fflush(fb); + + /* * We need the size of the VMstate data in Secondary side, * With which we can decide how much data should be read. */ @@ -400,6 +485,11 @@ out: return ret; } +static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) +{ + colo_checkpoint_notify(data); +} + static void colo_process_checkpoint(MigrationState *s) { QIOChannelBuffer *bioc; @@ -416,6 +506,9 @@ static void colo_process_checkpoint(MigrationState *s) goto out; } + packets_compare_notifier.notify = colo_compare_notify_checkpoint; + colo_compare_register_notifier(&packets_compare_notifier); + /* * Wait for Secondary finish loading VM states and enter COLO * restore. @@ -430,6 +523,12 @@ static void colo_process_checkpoint(MigrationState *s) object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); + replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } + vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); @@ -445,6 +544,9 @@ static void colo_process_checkpoint(MigrationState *s) qemu_sem_wait(&s->colo_checkpoint_sem); + if (s->state != MIGRATION_STATUS_COLO) { + goto out; + } ret = colo_do_checkpoint_transaction(s, bioc, fb); if (ret < 0) { goto out; @@ -461,11 +563,38 @@ out: qemu_fclose(fb); } - timer_del(s->colo_delay_timer); + /* + * There are only two reasons we can get here, some error happened + * or the user triggered failover. + */ + switch (failover_get_state()) { + case FAILOVER_STATUS_NONE: + qapi_event_send_colo_exit(COLO_MODE_PRIMARY, + COLO_EXIT_REASON_ERROR); + break; + case FAILOVER_STATUS_REQUIRE: + qapi_event_send_colo_exit(COLO_MODE_PRIMARY, + COLO_EXIT_REASON_REQUEST); + break; + default: + abort(); + } /* Hope this not to be too long to wait here */ qemu_sem_wait(&s->colo_exit_sem); qemu_sem_destroy(&s->colo_exit_sem); + + /* + * It is safe to unregister notifier after failover finished. + * Besides, colo_delay_timer and colo_checkpoint_sem can't be + * released befor unregister notifier, or there will be use-after-free + * error. + */ + colo_compare_unregister_notifier(&packets_compare_notifier); + timer_del(s->colo_delay_timer); + timer_free(s->colo_delay_timer); + qemu_sem_destroy(&s->colo_checkpoint_sem); + /* * Must be called after failover BH is completed, * Or the failover BH may shutdown the wrong fd that @@ -533,6 +662,7 @@ void *colo_process_incoming_thread(void *opaque) uint64_t total_size; uint64_t value; Error *local_err = NULL; + int ret; rcu_register_thread(); qemu_sem_init(&mis->colo_incoming_sem, 0); @@ -559,6 +689,16 @@ void *colo_process_incoming_thread(void *opaque) fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); + qemu_mutex_lock_iothread(); + replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } + vm_start(); + trace_colo_vm_state_change("stop", "run"); + qemu_mutex_unlock_iothread(); + colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, &local_err); if (local_err) { @@ -578,6 +718,11 @@ void *colo_process_incoming_thread(void *opaque) goto out; } + qemu_mutex_lock_iothread(); + vm_stop_force_state(RUN_STATE_COLO); + trace_colo_vm_state_change("run", "stop"); + qemu_mutex_unlock_iothread(); + /* FIXME: This is unnecessary for periodic checkpoint mode */ colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); @@ -591,6 +736,16 @@ void *colo_process_incoming_thread(void *opaque) goto out; } + qemu_mutex_lock_iothread(); + cpu_synchronize_all_pre_loadvm(); + ret = qemu_loadvm_state_main(mis->from_src_file, mis); + qemu_mutex_unlock_iothread(); + + if (ret < 0) { + error_report("Load VM's live state (ram) error"); + goto out; + } + value = colo_receive_message_value(mis->from_src_file, COLO_MESSAGE_VMSTATE_SIZE, &local_err); if (local_err) { @@ -622,15 +777,37 @@ void *colo_process_incoming_thread(void *opaque) } qemu_mutex_lock_iothread(); - qemu_system_reset(SHUTDOWN_CAUSE_NONE); vmstate_loading = true; - if (qemu_loadvm_state(fb) < 0) { - error_report("COLO: loadvm failed"); + ret = qemu_load_device_state(fb); + if (ret < 0) { + error_report("COLO: load device state failed"); + qemu_mutex_unlock_iothread(); + goto out; + } + + replication_get_error_all(&local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } + /* discard colo disk buffer */ + replication_do_checkpoint_all(&local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } + + /* Notify all filters of all NIC to do checkpoint */ + colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); + + if (local_err) { qemu_mutex_unlock_iothread(); goto out; } vmstate_loading = false; + vm_start(); + trace_colo_vm_state_change("stop", "run"); qemu_mutex_unlock_iothread(); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { @@ -654,6 +831,19 @@ out: error_report_err(local_err); } + switch (failover_get_state()) { + case FAILOVER_STATUS_NONE: + qapi_event_send_colo_exit(COLO_MODE_SECONDARY, + COLO_EXIT_REASON_ERROR); + break; + case FAILOVER_STATUS_REQUIRE: + qapi_event_send_colo_exit(COLO_MODE_SECONDARY, + COLO_EXIT_REASON_REQUEST); + break; + default: + abort(); + } + if (fb) { qemu_fclose(fb); } @@ -665,7 +855,7 @@ out: if (mis->to_src_file) { qemu_fclose(mis->to_src_file); } - migration_incoming_exit_colo(); + migration_incoming_disable_colo(); rcu_unregister_thread(); return NULL; diff --git a/migration/migration.c b/migration/migration.c index d6ae879dc8..7696729340 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -76,10 +76,8 @@ /* Migration XBZRLE default cache size */ #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) -/* The delay time (in ms) between two COLO checkpoints - * Note: Please change this default value to 10000 when we support hybrid mode. - */ -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 +/* The delay time (in ms) between two COLO checkpoints */ +#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 @@ -298,6 +296,22 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname, return migrate_send_rp_message(mis, msg_type, msglen, bufc); } +static bool migration_colo_enabled; +bool migration_incoming_colo_enabled(void) +{ + return migration_colo_enabled; +} + +void migration_incoming_disable_colo(void) +{ + migration_colo_enabled = false; +} + +void migration_incoming_enable_colo(void) +{ + migration_colo_enabled = true; +} + void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; @@ -388,6 +402,7 @@ static void process_incoming_migration_co(void *opaque) MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyState ps; int ret; + Error *local_err = NULL; assert(mis->from_src_file); mis->migration_incoming_co = qemu_coroutine_self(); @@ -419,7 +434,21 @@ static void process_incoming_migration_co(void *opaque) } /* we get COLO info, and know if we are in COLO mode */ - if (!ret && migration_incoming_enable_colo()) { + if (!ret && migration_incoming_colo_enabled()) { + /* Make sure all file formats flush their mutable metadata */ + bdrv_invalidate_cache_all(&local_err); + if (local_err) { + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_FAILED); + error_report_err(local_err); + exit(EXIT_FAILURE); + } + + if (colo_init_ram_cache() < 0) { + error_report("Init ram cache failed"); + exit(EXIT_FAILURE); + } + qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); mis->have_colo_incoming_thread = true; @@ -427,6 +456,8 @@ static void process_incoming_migration_co(void *opaque) /* Wait checkpoint incoming thread exit before free resource */ qemu_thread_join(&mis->colo_incoming_thread); + /* We hold the global iothread lock, so it is safe here */ + colo_release_ram_cache(); } if (ret < 0) { @@ -3017,6 +3048,11 @@ static void *migration_thread(void *opaque) qemu_savevm_send_postcopy_advise(s->to_dst_file); } + if (migrate_colo_enabled()) { + /* Notify migration destination that we enable COLO */ + qemu_savevm_send_colo_enable(s->to_dst_file); + } + qemu_savevm_state_setup(s->to_dst_file); s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; diff --git a/migration/ram.c b/migration/ram.c index bc38d98cc3..7e7deec4d8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3447,6 +3447,29 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, return block->host + offset; } +static inline void *colo_cache_from_block_offset(RAMBlock *block, + ram_addr_t offset) +{ + if (!offset_in_ramblock(block, offset)) { + return NULL; + } + if (!block->colo_cache) { + error_report("%s: colo_cache is NULL in block :%s", + __func__, block->idstr); + return NULL; + } + + /* + * During colo checkpoint, we need bitmap of these migrated pages. + * It help us to decide which pages in ram cache should be flushed + * into VM's RAM later. + */ + if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) { + ram_state->migration_dirty_pages++; + } + return block->colo_cache + offset; +} + /** * ram_handle_compressed: handle the zero page case * @@ -3651,6 +3674,88 @@ static void decompress_data_with_multi_threads(QEMUFile *f, qemu_mutex_unlock(&decomp_done_lock); } +/* + * colo cache: this is for secondary VM, we cache the whole + * memory of the secondary VM, it is need to hold the global lock + * to call this helper. + */ +int colo_init_ram_cache(void) +{ + RAMBlock *block; + + rcu_read_lock(); + RAMBLOCK_FOREACH_MIGRATABLE(block) { + block->colo_cache = qemu_anon_ram_alloc(block->used_length, + NULL, + false); + if (!block->colo_cache) { + error_report("%s: Can't alloc memory for COLO cache of block %s," + "size 0x" RAM_ADDR_FMT, __func__, block->idstr, + block->used_length); + goto out_locked; + } + memcpy(block->colo_cache, block->host, block->used_length); + } + rcu_read_unlock(); + /* + * Record the dirty pages that sent by PVM, we use this dirty bitmap together + * with to decide which page in cache should be flushed into SVM's RAM. Here + * we use the same name 'ram_bitmap' as for migration. + */ + if (ram_bytes_total()) { + RAMBlock *block; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + unsigned long pages = block->max_length >> TARGET_PAGE_BITS; + + block->bmap = bitmap_new(pages); + bitmap_set(block->bmap, 0, pages); + } + } + ram_state = g_new0(RAMState, 1); + ram_state->migration_dirty_pages = 0; + memory_global_dirty_log_start(); + + return 0; + +out_locked: + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + + rcu_read_unlock(); + return -errno; +} + +/* It is need to hold the global lock to call this helper */ +void colo_release_ram_cache(void) +{ + RAMBlock *block; + + memory_global_dirty_log_stop(); + RAMBLOCK_FOREACH_MIGRATABLE(block) { + g_free(block->bmap); + block->bmap = NULL; + } + + rcu_read_lock(); + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (block->colo_cache) { + qemu_anon_ram_free(block->colo_cache, block->used_length); + block->colo_cache = NULL; + } + } + + rcu_read_unlock(); + g_free(ram_state); + ram_state = NULL; +} + /** * ram_load_setup: Setup RAM for migration incoming side * @@ -3667,6 +3772,7 @@ static int ram_load_setup(QEMUFile *f, void *opaque) xbzrle_load_setup(); ramblock_recv_map_init(); + return 0; } @@ -3687,6 +3793,7 @@ static int ram_load_cleanup(void *opaque) g_free(rb->receivedmap); rb->receivedmap = NULL; } + return 0; } @@ -3869,6 +3976,46 @@ static bool postcopy_is_running(void) return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END; } +/* + * Flush content of RAM cache into SVM's memory. + * Only flush the pages that be dirtied by PVM or SVM or both. + */ +static void colo_flush_ram_cache(void) +{ + RAMBlock *block = NULL; + void *dst_host; + void *src_host; + unsigned long offset = 0; + + memory_global_dirty_log_sync(); + rcu_read_lock(); + RAMBLOCK_FOREACH_MIGRATABLE(block) { + migration_bitmap_sync_range(ram_state, block, 0, block->used_length); + } + rcu_read_unlock(); + + trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages); + rcu_read_lock(); + block = QLIST_FIRST_RCU(&ram_list.blocks); + + while (block) { + offset = migration_bitmap_find_dirty(ram_state, block, offset); + + if (offset << TARGET_PAGE_BITS >= block->used_length) { + offset = 0; + block = QLIST_NEXT_RCU(block, next); + } else { + migration_bitmap_clear_dirty(ram_state, block, offset); + dst_host = block->host + (offset << TARGET_PAGE_BITS); + src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); + memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + } + } + + rcu_read_unlock(); + trace_colo_flush_ram_cache_end(); +} + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int flags = 0, ret = 0, invalid_flags = 0; @@ -3924,13 +4071,24 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { RAMBlock *block = ram_block_from_stream(f, flags); - host = host_from_ram_block_offset(block, addr); + /* + * After going into COLO, we should load the Page into colo_cache. + */ + if (migration_incoming_in_colo_state()) { + host = colo_cache_from_block_offset(block, addr); + } else { + host = host_from_ram_block_offset(block, addr); + } if (!host) { error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); ret = -EINVAL; break; } - ramblock_recv_bitmap_set(block, host); + + if (!migration_incoming_in_colo_state()) { + ramblock_recv_bitmap_set(block, host); + } + trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); } @@ -4034,6 +4192,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret |= wait_for_decompress_done(); rcu_read_unlock(); trace_ram_load_complete(ret, seq_iter); + + if (!ret && migration_incoming_in_colo_state()) { + colo_flush_ram_cache(); + } return ret; } diff --git a/migration/ram.h b/migration/ram.h index a139066846..83ff1bc11a 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -71,4 +71,8 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file, const char *block_name); int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); +/* ram cache */ +int colo_init_ram_cache(void); +void colo_release_ram_cache(void); + #endif diff --git a/migration/savevm.c b/migration/savevm.c index 2d10e45582..e4caff9a6a 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -56,6 +56,7 @@ #include "io/channel-file.h" #include "sysemu/replay.h" #include "qjson.h" +#include "migration/colo.h" #ifndef ETH_P_RARP #define ETH_P_RARP 0x8035 @@ -82,6 +83,7 @@ enum qemu_vm_cmd { were previously sent during precopy but are dirty. */ MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ + MIG_CMD_ENABLE_COLO, /* Enable COLO */ MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */ MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */ MIG_CMD_MAX @@ -841,6 +843,12 @@ static void qemu_savevm_command_send(QEMUFile *f, qemu_fflush(f); } +void qemu_savevm_send_colo_enable(QEMUFile *f) +{ + trace_savevm_send_colo_enable(); + qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL); +} + void qemu_savevm_send_ping(QEMUFile *f, uint32_t value) { uint32_t buf; @@ -1370,13 +1378,21 @@ done: return ret; } -static int qemu_save_device_state(QEMUFile *f) +void qemu_savevm_live_state(QEMUFile *f) { - SaveStateEntry *se; + /* save QEMU_VM_SECTION_END section */ + qemu_savevm_state_complete_precopy(f, true, false); + qemu_put_byte(f, QEMU_VM_EOF); +} - qemu_put_be32(f, QEMU_VM_FILE_MAGIC); - qemu_put_be32(f, QEMU_VM_FILE_VERSION); +int qemu_save_device_state(QEMUFile *f) +{ + SaveStateEntry *se; + if (!migration_in_colo_state()) { + qemu_put_be32(f, QEMU_VM_FILE_MAGIC); + qemu_put_be32(f, QEMU_VM_FILE_VERSION); + } cpu_synchronize_all_states(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { @@ -1432,8 +1448,6 @@ enum LoadVMExitCodes { LOADVM_QUIT = 1, }; -static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); - /* ------ incoming postcopy messages ------ */ /* 'advise' arrives before any transfers just to tell us that a postcopy * *might* happen - it might be skipped if precopy transferred everything @@ -1922,6 +1936,12 @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis, return 0; } +static int loadvm_process_enable_colo(MigrationIncomingState *mis) +{ + migration_incoming_enable_colo(); + return colo_init_ram_cache(); +} + /* * Process an incoming 'QEMU_VM_COMMAND' * 0 just a normal return @@ -2001,6 +2021,9 @@ static int loadvm_process_command(QEMUFile *f) case MIG_CMD_RECV_BITMAP: return loadvm_handle_recv_bitmap(mis, len); + + case MIG_CMD_ENABLE_COLO: + return loadvm_process_enable_colo(mis); } return 0; @@ -2230,7 +2253,7 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) return true; } -static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) +int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) { uint8_t section_type; int ret = 0; @@ -2401,6 +2424,22 @@ int qemu_loadvm_state(QEMUFile *f) return ret; } +int qemu_load_device_state(QEMUFile *f) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + int ret; + + /* Load QEMU_VM_SECTION_FULL section */ + ret = qemu_loadvm_state_main(f, mis); + if (ret < 0) { + error_report("Failed to load device state: %d", ret); + return ret; + } + + cpu_synchronize_all_post_init(); + return 0; +} + int save_snapshot(const char *name, Error **errp) { BlockDriverState *bs, *bs1; diff --git a/migration/savevm.h b/migration/savevm.h index a5e65b8ae3..51a4b9caa8 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -55,8 +55,13 @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, uint16_t len, uint64_t *start_list, uint64_t *length_list); +void qemu_savevm_send_colo_enable(QEMUFile *f); +void qemu_savevm_live_state(QEMUFile *f); +int qemu_save_device_state(QEMUFile *f); int qemu_loadvm_state(QEMUFile *f); void qemu_loadvm_state_cleanup(void); +int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); +int qemu_load_device_state(QEMUFile *f); #endif diff --git a/migration/trace-events b/migration/trace-events index 9430f3cbe0..bd2d0cd25a 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -37,6 +37,7 @@ savevm_send_ping(uint32_t val) "0x%x" savevm_send_postcopy_listen(void) "" savevm_send_postcopy_run(void) "" savevm_send_postcopy_resume(void) "" +savevm_send_colo_enable(void) "" savevm_send_recv_bitmap(char *name) "%s" savevm_state_setup(void) "" savevm_state_resume_prepare(void) "" @@ -101,6 +102,8 @@ ram_dirty_bitmap_sync_start(void) "" ram_dirty_bitmap_sync_wait(void) "" ram_dirty_bitmap_sync_complete(void) "" ram_state_resume_prepare(uint64_t v) "%" PRId64 +colo_flush_ram_cache_begin(uint64_t dirty_pages) "dirty_pages %" PRIu64 +colo_flush_ram_cache_end(void) "" # migration/migration.c await_return_path_close_on_source_close(void) "" @@ -83,6 +83,7 @@ #include "sysemu/cpus.h" #include "sysemu/iothread.h" #include "qemu/cutils.h" +#include "tcg/tcg.h" #if defined(TARGET_S390X) #include "hw/s390x/storage-keys.h" @@ -1966,16 +1967,22 @@ static void hmp_info_numa(Monitor *mon, const QDict *qdict) #ifdef CONFIG_PROFILER -int64_t tcg_time; int64_t dev_time; static void hmp_info_profile(Monitor *mon, const QDict *qdict) { + static int64_t last_cpu_exec_time; + int64_t cpu_exec_time; + int64_t delta; + + cpu_exec_time = tcg_cpu_exec_time(); + delta = cpu_exec_time - last_cpu_exec_time; + monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n", dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n", - tcg_time, tcg_time / (double)NANOSECONDS_PER_SECOND); - tcg_time = 0; + delta, delta / (double)NANOSECONDS_PER_SECOND); + last_cpu_exec_time = cpu_exec_time; dev_time = 0; } #else diff --git a/net/colo-compare.c b/net/colo-compare.c index dd745a491b..a39191d522 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -27,11 +27,20 @@ #include "qemu/sockets.h" #include "colo.h" #include "sysemu/iothread.h" +#include "net/colo-compare.h" +#include "migration/colo.h" +#include "migration/migration.h" #define TYPE_COLO_COMPARE "colo-compare" #define COLO_COMPARE(obj) \ OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) +static QTAILQ_HEAD(, CompareState) net_compares = + QTAILQ_HEAD_INITIALIZER(net_compares); + +static NotifierList colo_compare_notifiers = + NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers); + #define COMPARE_READ_LEN_MAX NET_BUFSIZE #define MAX_QUEUE_SIZE 1024 @@ -41,6 +50,10 @@ /* TODO: Should be configurable */ #define REGULAR_PACKET_CHECK_MS 3000 +static QemuMutex event_mtx; +static QemuCond event_complete_cond; +static int event_unhandled_count; + /* * + CompareState ++ * | | @@ -87,6 +100,11 @@ typedef struct CompareState { IOThread *iothread; GMainContext *worker_context; QEMUTimer *packet_check_timer; + + QEMUBH *event_bh; + enum colo_event event; + + QTAILQ_ENTRY(CompareState) next; } CompareState; typedef struct CompareClass { @@ -98,6 +116,12 @@ enum { SECONDARY_IN, }; +static void colo_compare_inconsistency_notify(void) +{ + notifier_list_notify(&colo_compare_notifiers, + migrate_get_current()); +} + static int compare_chr_send(CompareState *s, const uint8_t *buf, uint32_t size, @@ -413,10 +437,7 @@ sec: qemu_hexdump((char *)spkt->data, stderr, "colo-compare spkt", spkt->size); - /* - * colo_compare_inconsistent_notify(); - * TODO: notice to checkpoint(); - */ + colo_compare_inconsistency_notify(); } } @@ -547,8 +568,18 @@ static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time) } } +void colo_compare_register_notifier(Notifier *notify) +{ + notifier_list_add(&colo_compare_notifiers, notify); +} + +void colo_compare_unregister_notifier(Notifier *notify) +{ + notifier_remove(notify); +} + static int colo_old_packet_check_one_conn(Connection *conn, - void *user_data) + void *user_data) { GList *result = NULL; int64_t check_time = REGULAR_PACKET_CHECK_MS; @@ -559,10 +590,7 @@ static int colo_old_packet_check_one_conn(Connection *conn, if (result) { /* Do checkpoint will flush old packet */ - /* - * TODO: Notify colo frame to do checkpoint. - * colo_compare_inconsistent_notify(); - */ + colo_compare_inconsistency_notify(); return 0; } @@ -606,11 +634,12 @@ static void colo_compare_packet(CompareState *s, Connection *conn, /* * If one packet arrive late, the secondary_list or * primary_list will be empty, so we can't compare it - * until next comparison. + * until next comparison. If the packets in the list are + * timeout, it will trigger a checkpoint request. */ trace_colo_compare_main("packet different"); g_queue_push_head(&conn->primary_list, pkt); - /* TODO: colo_notify_checkpoint();*/ + colo_compare_inconsistency_notify(); break; } } @@ -736,6 +765,25 @@ static void check_old_packet_regular(void *opaque) REGULAR_PACKET_CHECK_MS); } +/* Public API, Used for COLO frame to notify compare event */ +void colo_notify_compares_event(void *opaque, int event, Error **errp) +{ + CompareState *s; + + qemu_mutex_lock(&event_mtx); + QTAILQ_FOREACH(s, &net_compares, next) { + s->event = event; + qemu_bh_schedule(s->event_bh); + event_unhandled_count++; + } + /* Wait all compare threads to finish handling this event */ + while (event_unhandled_count > 0) { + qemu_cond_wait(&event_complete_cond, &event_mtx); + } + + qemu_mutex_unlock(&event_mtx); +} + static void colo_compare_timer_init(CompareState *s) { AioContext *ctx = iothread_get_aio_context(s->iothread); @@ -756,6 +804,30 @@ static void colo_compare_timer_del(CompareState *s) } } +static void colo_flush_packets(void *opaque, void *user_data); + +static void colo_compare_handle_event(void *opaque) +{ + CompareState *s = opaque; + + switch (s->event) { + case COLO_EVENT_CHECKPOINT: + g_queue_foreach(&s->conn_list, colo_flush_packets, s); + break; + case COLO_EVENT_FAILOVER: + break; + default: + break; + } + + assert(event_unhandled_count > 0); + + qemu_mutex_lock(&event_mtx); + event_unhandled_count--; + qemu_cond_broadcast(&event_complete_cond); + qemu_mutex_unlock(&event_mtx); +} + static void colo_compare_iothread(CompareState *s) { object_ref(OBJECT(s->iothread)); @@ -769,6 +841,7 @@ static void colo_compare_iothread(CompareState *s) s, s->worker_context, true); colo_compare_timer_init(s); + s->event_bh = qemu_bh_new(colo_compare_handle_event, s); } static char *compare_get_pri_indev(Object *obj, Error **errp) @@ -926,8 +999,13 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr); net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr); + QTAILQ_INSERT_TAIL(&net_compares, s, next); + g_queue_init(&s->conn_list); + qemu_mutex_init(&event_mtx); + qemu_cond_init(&event_complete_cond); + s->connection_track_table = g_hash_table_new_full(connection_key_hash, connection_key_equal, g_free, @@ -990,6 +1068,7 @@ static void colo_compare_init(Object *obj) static void colo_compare_finalize(Object *obj) { CompareState *s = COLO_COMPARE(obj); + CompareState *tmp = NULL; qemu_chr_fe_deinit(&s->chr_pri_in, false); qemu_chr_fe_deinit(&s->chr_sec_in, false); @@ -997,6 +1076,16 @@ static void colo_compare_finalize(Object *obj) if (s->iothread) { colo_compare_timer_del(s); } + + qemu_bh_delete(s->event_bh); + + QTAILQ_FOREACH(tmp, &net_compares, next) { + if (tmp == s) { + QTAILQ_REMOVE(&net_compares, s, next); + break; + } + } + /* Release all unhandled packets after compare thead exited */ g_queue_foreach(&s->conn_list, colo_flush_packets, s); @@ -1009,6 +1098,10 @@ static void colo_compare_finalize(Object *obj) if (s->iothread) { object_unref(OBJECT(s->iothread)); } + + qemu_mutex_destroy(&event_mtx); + qemu_cond_destroy(&event_complete_cond); + g_free(s->pri_indev); g_free(s->sec_indev); g_free(s->outdev); diff --git a/net/colo-compare.h b/net/colo-compare.h new file mode 100644 index 0000000000..22ddd512e2 --- /dev/null +++ b/net/colo-compare.h @@ -0,0 +1,24 @@ +/* + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) + * (a.k.a. Fault Tolerance or Continuous Replication) + * + * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD. + * Copyright (c) 2017 FUJITSU LIMITED + * Copyright (c) 2017 Intel Corporation + * + * Authors: + * zhanghailiang <zhang.zhanghailiang@huawei.com> + * Zhang Chen <zhangckid@gmail.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_COLO_COMPARE_H +#define QEMU_COLO_COMPARE_H + +void colo_notify_compares_event(void *opaque, int event, Error **errp); +void colo_compare_register_notifier(Notifier *notify); +void colo_compare_unregister_notifier(Notifier *notify); + +#endif /* QEMU_COLO_COMPARE_H */ diff --git a/net/colo.c b/net/colo.c index 6dda4ed66e..49176bf07b 100644 --- a/net/colo.c +++ b/net/colo.c @@ -137,7 +137,7 @@ Connection *connection_new(ConnectionKey *key) conn->ip_proto = key->ip_proto; conn->processing = false; conn->offset = 0; - conn->syn_flag = 0; + conn->tcp_state = TCPS_CLOSED; conn->pack = 0; conn->sack = 0; g_queue_init(&conn->primary_list); @@ -221,3 +221,11 @@ Connection *connection_get(GHashTable *connection_track_table, return conn; } + +bool connection_has_tracked(GHashTable *connection_track_table, + ConnectionKey *key) +{ + Connection *conn = g_hash_table_lookup(connection_track_table, key); + + return conn ? true : false; +} diff --git a/net/colo.h b/net/colo.h index da6c36dcf7..11c5226488 100644 --- a/net/colo.h +++ b/net/colo.h @@ -18,6 +18,7 @@ #include "slirp/slirp.h" #include "qemu/jhash.h" #include "qemu/timer.h" +#include "slirp/tcp.h" #define HASHTABLE_MAX_SIZE 16384 @@ -81,11 +82,9 @@ typedef struct Connection { uint32_t sack; /* offset = secondary_seq - primary_seq */ tcp_seq offset; - /* - * we use this flag update offset func - * run once in independent tcp connection - */ - int syn_flag; + + int tcp_state; /* TCP FSM state */ + tcp_seq fin_ack_seq; /* the seq of 'fin=1,ack=1' */ } Connection; uint32_t connection_key_hash(const void *opaque); @@ -99,6 +98,8 @@ void connection_destroy(void *opaque); Connection *connection_get(GHashTable *connection_track_table, ConnectionKey *key, GQueue *conn_list); +bool connection_has_tracked(GHashTable *connection_track_table, + ConnectionKey *key); void connection_hashtable_reset(GHashTable *connection_track_table); Packet *packet_new(const void *data, int size, int vnet_hdr_len); void packet_destroy(void *opaque, void *user_data); diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c index f584e4eba4..bb8f4d93b1 100644 --- a/net/filter-rewriter.c +++ b/net/filter-rewriter.c @@ -20,11 +20,15 @@ #include "qemu/main-loop.h" #include "qemu/iov.h" #include "net/checksum.h" +#include "net/colo.h" +#include "migration/colo.h" #define FILTER_COLO_REWRITER(obj) \ OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER) #define TYPE_FILTER_REWRITER "filter-rewriter" +#define FAILOVER_MODE_ON true +#define FAILOVER_MODE_OFF false typedef struct RewriterState { NetFilterState parent_obj; @@ -32,8 +36,14 @@ typedef struct RewriterState { /* hashtable to save connection */ GHashTable *connection_track_table; bool vnet_hdr; + bool failover_mode; } RewriterState; +static void filter_rewriter_failover_mode(RewriterState *s) +{ + s->failover_mode = FAILOVER_MODE_ON; +} + static void filter_rewriter_flush(NetFilterState *nf) { RewriterState *s = FILTER_COLO_REWRITER(nf); @@ -59,9 +69,9 @@ static int is_tcp_packet(Packet *pkt) } /* handle tcp packet from primary guest */ -static int handle_primary_tcp_pkt(NetFilterState *nf, +static int handle_primary_tcp_pkt(RewriterState *rf, Connection *conn, - Packet *pkt) + Packet *pkt, ConnectionKey *key) { struct tcphdr *tcp_pkt; @@ -74,23 +84,28 @@ static int handle_primary_tcp_pkt(NetFilterState *nf, trace_colo_filter_rewriter_conn_offset(conn->offset); } + if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) && + conn->tcp_state == TCPS_SYN_SENT) { + conn->tcp_state = TCPS_ESTABLISHED; + } + if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { /* * we use this flag update offset func * run once in independent tcp connection */ - conn->syn_flag = 1; + conn->tcp_state = TCPS_SYN_RECEIVED; } if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) { - if (conn->syn_flag) { + if (conn->tcp_state == TCPS_SYN_RECEIVED) { /* * offset = secondary_seq - primary seq * ack packet sent by guest from primary node, * so we use th_ack - 1 get primary_seq */ conn->offset -= (ntohl(tcp_pkt->th_ack) - 1); - conn->syn_flag = 0; + conn->tcp_state = TCPS_ESTABLISHED; } if (conn->offset) { /* handle packets to the secondary from the primary */ @@ -99,15 +114,66 @@ static int handle_primary_tcp_pkt(NetFilterState *nf, net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len, pkt->size - pkt->vnet_hdr_len); } + + /* + * Passive close step 3 + */ + if ((conn->tcp_state == TCPS_LAST_ACK) && + (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) { + conn->tcp_state = TCPS_CLOSED; + g_hash_table_remove(rf->connection_track_table, key); + } + } + + if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) { + /* + * Passive close. + * Step 1: + * The *server* side of this connect is VM, *client* tries to close + * the connection. We will into CLOSE_WAIT status. + * + * Step 2: + * In this step we will into LAST_ACK status. + * + * We got 'fin=1, ack=1' packet from server side, we need to + * record the seq of 'fin=1, ack=1' packet. + * + * Step 3: + * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1' + * packet from server side. From this point, we can ensure that there + * will be no packets in the connection, except that, some errors + * happen between the path of 'filter object' and vNIC, if this rare + * case really happen, we can still create a new connection, + * So it is safe to remove the connection from connection_track_table. + * + */ + if (conn->tcp_state == TCPS_ESTABLISHED) { + conn->tcp_state = TCPS_CLOSE_WAIT; + } + + /* + * Active close step 2. + */ + if (conn->tcp_state == TCPS_FIN_WAIT_1) { + conn->tcp_state = TCPS_TIME_WAIT; + /* + * For simplify implementation, we needn't wait 2MSL time + * in filter rewriter. Because guest kernel will track the + * TCP status and wait 2MSL time, if client resend the FIN + * packet, guest will apply the last ACK too. + */ + conn->tcp_state = TCPS_CLOSED; + g_hash_table_remove(rf->connection_track_table, key); + } } return 0; } /* handle tcp packet from secondary guest */ -static int handle_secondary_tcp_pkt(NetFilterState *nf, +static int handle_secondary_tcp_pkt(RewriterState *rf, Connection *conn, - Packet *pkt) + Packet *pkt, ConnectionKey *key) { struct tcphdr *tcp_pkt; @@ -121,7 +187,8 @@ static int handle_secondary_tcp_pkt(NetFilterState *nf, trace_colo_filter_rewriter_conn_offset(conn->offset); } - if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { + if (conn->tcp_state == TCPS_SYN_RECEIVED && + ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { /* * save offset = secondary_seq and then * in handle_primary_tcp_pkt make offset @@ -130,6 +197,12 @@ static int handle_secondary_tcp_pkt(NetFilterState *nf, conn->offset = ntohl(tcp_pkt->th_seq); } + /* VM active connect */ + if (conn->tcp_state == TCPS_CLOSED && + ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { + conn->tcp_state = TCPS_SYN_SENT; + } + if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) { /* Only need to adjust seq while offset is Non-zero */ if (conn->offset) { @@ -141,6 +214,32 @@ static int handle_secondary_tcp_pkt(NetFilterState *nf, } } + /* + * Passive close step 2: + */ + if (conn->tcp_state == TCPS_CLOSE_WAIT && + (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) { + conn->fin_ack_seq = ntohl(tcp_pkt->th_seq); + conn->tcp_state = TCPS_LAST_ACK; + } + + /* + * Active close + * + * Step 1: + * The *server* side of this connect is VM, *server* tries to close + * the connection. + * + * Step 2: + * We will into CLOSE_WAIT status. + * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and + * CLOSING status. + */ + if (conn->tcp_state == TCPS_ESTABLISHED && + (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) { + conn->tcp_state = TCPS_FIN_WAIT_1; + } + return 0; } @@ -184,13 +283,20 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, */ reverse_connection_key(&key); } + + /* After failover we needn't change new TCP packet */ + if (s->failover_mode && + !connection_has_tracked(s->connection_track_table, &key)) { + goto out; + } + conn = connection_get(s->connection_track_table, &key, NULL); if (sender == nf->netdev) { /* NET_FILTER_DIRECTION_TX */ - if (!handle_primary_tcp_pkt(nf, conn, pkt)) { + if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) { qemu_net_queue_send(s->incoming_queue, sender, 0, (const uint8_t *)pkt->data, pkt->size, NULL); packet_destroy(pkt, NULL); @@ -203,7 +309,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, } } else { /* NET_FILTER_DIRECTION_RX */ - if (!handle_secondary_tcp_pkt(nf, conn, pkt)) { + if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) { qemu_net_queue_send(s->incoming_queue, sender, 0, (const uint8_t *)pkt->data, pkt->size, NULL); packet_destroy(pkt, NULL); @@ -217,11 +323,49 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, } } +out: packet_destroy(pkt, NULL); pkt = NULL; return 0; } +static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data) +{ + Connection *conn = (Connection *)value; + + conn->offset = 0; +} + +static gboolean offset_is_nonzero(gpointer key, + gpointer value, + gpointer user_data) +{ + Connection *conn = (Connection *)value; + + return conn->offset ? true : false; +} + +static void colo_rewriter_handle_event(NetFilterState *nf, int event, + Error **errp) +{ + RewriterState *rs = FILTER_COLO_REWRITER(nf); + + switch (event) { + case COLO_EVENT_CHECKPOINT: + g_hash_table_foreach(rs->connection_track_table, + reset_seq_offset, NULL); + break; + case COLO_EVENT_FAILOVER: + if (!g_hash_table_find(rs->connection_track_table, + offset_is_nonzero, NULL)) { + filter_rewriter_failover_mode(rs); + } + break; + default: + break; + } +} + static void colo_rewriter_cleanup(NetFilterState *nf) { RewriterState *s = FILTER_COLO_REWRITER(nf); @@ -265,6 +409,7 @@ static void filter_rewriter_init(Object *obj) RewriterState *s = FILTER_COLO_REWRITER(obj); s->vnet_hdr = false; + s->failover_mode = FAILOVER_MODE_OFF; object_property_add_bool(obj, "vnet_hdr_support", filter_rewriter_get_vnet_hdr, filter_rewriter_set_vnet_hdr, NULL); @@ -277,6 +422,7 @@ static void colo_rewriter_class_init(ObjectClass *oc, void *data) nfc->setup = colo_rewriter_setup; nfc->cleanup = colo_rewriter_cleanup; nfc->receive_iov = colo_rewriter_receive_iov; + nfc->handle_event = colo_rewriter_handle_event; } static const TypeInfo colo_rewriter_info = { diff --git a/net/filter.c b/net/filter.c index 2fd7d7d663..c9f9e5fa08 100644 --- a/net/filter.c +++ b/net/filter.c @@ -17,6 +17,8 @@ #include "net/vhost_net.h" #include "qom/object_interfaces.h" #include "qemu/iov.h" +#include "net/colo.h" +#include "migration/colo.h" static inline bool qemu_can_skip_netfilter(NetFilterState *nf) { @@ -245,11 +247,26 @@ static void netfilter_finalize(Object *obj) g_free(nf->netdev_id); } +static void default_handle_event(NetFilterState *nf, int event, Error **errp) +{ + switch (event) { + case COLO_EVENT_CHECKPOINT: + break; + case COLO_EVENT_FAILOVER: + object_property_set_str(OBJECT(nf), "off", "status", errp); + break; + default: + break; + } +} + static void netfilter_class_init(ObjectClass *oc, void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + NetFilterClass *nfc = NETFILTER_CLASS(oc); ucc->complete = netfilter_complete; + nfc->handle_event = default_handle_event; } static const TypeInfo netfilter_info = { @@ -712,10 +712,15 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, void *opaque) { NetClientState *nc = opaque; + size_t size = iov_size(iov, iovcnt); int ret; + if (size > INT_MAX) { + return size; + } + if (nc->link_down) { - return iov_size(iov, iovcnt); + return size; } if (nc->receive_disabled) { @@ -1335,6 +1340,25 @@ void hmp_info_network(Monitor *mon, const QDict *qdict) } } +void colo_notify_filters_event(int event, Error **errp) +{ + NetClientState *nc; + NetFilterState *nf; + NetFilterClass *nfc = NULL; + Error *local_err = NULL; + + QTAILQ_FOREACH(nc, &net_clients, next) { + QTAILQ_FOREACH(nf, &nc->filters, next) { + nfc = NETFILTER_GET_CLASS(OBJECT(nf)); + nfc->handle_event(nf, event, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + } +} + void qmp_set_link(const char *name, bool up, Error **errp) { NetClientState *ncs[MAX_QUEUE_NUM]; diff --git a/qapi/migration.json b/qapi/migration.json index 6e8c21258a..0928f4b727 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -923,18 +923,18 @@ ## # @COLOMode: # -# The colo mode +# The COLO current mode. # -# @unknown: unknown mode +# @none: COLO is disabled. # -# @primary: master side +# @primary: COLO node in primary side. # -# @secondary: slave side +# @secondary: COLO node in slave side. # # Since: 2.8 ## { 'enum': 'COLOMode', - 'data': [ 'unknown', 'primary', 'secondary'] } + 'data': [ 'none', 'primary', 'secondary'] } ## # @FailoverStatus: @@ -957,6 +957,44 @@ 'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] } ## +# @COLO_EXIT: +# +# Emitted when VM finishes COLO mode due to some errors happening or +# at the request of users. +# +# @mode: report COLO mode when COLO exited. +# +# @reason: describes the reason for the COLO exit. +# +# Since: 3.1 +# +# Example: +# +# <- { "timestamp": {"seconds": 2032141960, "microseconds": 417172}, +# "event": "COLO_EXIT", "data": {"mode": "primary", "reason": "request" } } +# +## +{ 'event': 'COLO_EXIT', + 'data': {'mode': 'COLOMode', 'reason': 'COLOExitReason' } } + +## +# @COLOExitReason: +# +# The reason for a COLO exit +# +# @none: no failover has ever happened. This can't occur in the +# COLO_EXIT event, only in the result of query-colo-status. +# +# @request: COLO exit is due to an external request +# +# @error: COLO exit is due to an internal error +# +# Since: 3.1 +## +{ 'enum': 'COLOExitReason', + 'data': [ 'none', 'request', 'error' ] } + +## # @x-colo-lost-heartbeat: # # Tell qemu that heartbeat is lost, request it to do takeover procedures. @@ -1270,6 +1308,38 @@ { 'command': 'xen-colo-do-checkpoint' } ## +# @COLOStatus: +# +# The result format for 'query-colo-status'. +# +# @mode: COLO running mode. If COLO is running, this field will return +# 'primary' or 'secondary'. +# +# @reason: describes the reason for the COLO exit. +# +# Since: 3.0 +## +{ 'struct': 'COLOStatus', + 'data': { 'mode': 'COLOMode', 'reason': 'COLOExitReason' } } + +## +# @query-colo-status: +# +# Query COLO status while the vm is running. +# +# Returns: A @COLOStatus object showing the status. +# +# Example: +# +# -> { "execute": "query-colo-status" } +# <- { "return": { "mode": "primary", "active": true, "reason": "request" } } +# +# Since: 3.0 +## +{ 'command': 'query-colo-status', + 'returns': 'COLOStatus' } + +## # @migrate-recover: # # Provide a recovery migration stream URI. diff --git a/qemu-options.hx b/qemu-options.hx index 829ed81e35..214ce396f9 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2256,7 +2256,7 @@ qemu-system-i386 linux.img \ -netdev socket,id=n2,mcast=230.0.0.1:1234 # launch yet another QEMU instance on same "bus" qemu-system-i386 linux.img \ - -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \ + -device e1000,netdev=n3,mac=52:54:00:12:34:58 \ -netdev socket,id=n3,mcast=230.0.0.1:1234 @end example @@ -265,7 +265,7 @@ static void cpu_common_reset(CPUState *cpu) cpu->mem_io_pc = 0; cpu->mem_io_vaddr = 0; cpu->icount_extra = 0; - cpu->icount_decr.u32 = 0; + atomic_set(&cpu->icount_decr.u32, 0); cpu->can_do_io = 1; cpu->exception_index = -1; cpu->crash_occurred = false; diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index b08078e7fc..a953897fcc 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -201,7 +201,6 @@ static void alpha_cpu_initfn(Object *obj) CPUAlphaState *env = &cpu->env; cs->env_ptr = env; - tlb_flush(cs); env->lock_addr = -1; #if defined(CONFIG_USER_ONLY) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 7f6ad3000b..61799d20e1 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -30,6 +30,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" #include "fpu/softfloat.h" #include <zlib.h> /* For crc32 */ @@ -509,189 +510,187 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Returns 0 on success; 1 otherwise. */ -static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) { - Int128 oldv, cmpv, newv; + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; bool success; - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - newv = int128_make128(new_lo, new_hi); - - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; - #ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o0 = ldq_le_p(haddr + 0); - o1 = ldq_le_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_le_p(haddr + 0, int128_getlo(newv)); - stq_le_p(haddr + 1, int128_gethi(newv)); - } - helper_retaddr = 0; + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } + helper_retaddr = 0; #else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); - - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } -#endif + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); } +#endif return !success; } -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC()); -} - uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC()); -} - -static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi, - bool parallel, uintptr_t ra) -{ Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); bool success; + int mem_idx; + TCGMemOpIdx oi; - /* high and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - newv = int128_make128(new_hi, new_lo); + assert(HAVE_CMPXCHG128); - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - success = int128_eq(oldv, cmpv); -#endif - } else { - uint64_t o0, o1; + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o1 = ldq_be_p(haddr + 0); - o0 = ldq_be_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_be_p(haddr + 0, int128_gethi(newv)); - stq_be_p(haddr + 1, int128_getlo(newv)); - } - helper_retaddr = 0; -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); - - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } -#endif - } + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); return !success; } uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC()); + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + Int128 newv = int128_make128(new_lo, new_hi); + Int128 oldv; + uintptr_t ra = GETPC(); + uint64_t o0, o1; + bool success; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } + helper_retaddr = 0; +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + + return !success; } uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) + uint64_t new_lo, uint64_t new_hi) { - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); + Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + bool success; + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + + /* + * High and low need to be switched here because this is not actually a + * 128bit store but two doublewords stored consecutively + */ + cmpv = int128_make128(env->exclusive_high, env->exclusive_val); + newv = int128_make128(new_hi, new_lo); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + + success = int128_eq(oldv, cmpv); + return !success; } /* Writes back the old data into Rs. */ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs] = int128_getlo(oldv); env->xregs[rs + 1] = int128_gethi(oldv); -#endif } void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_hi, uint64_t new_lo) { - uintptr_t ra = GETPC(); -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else Int128 oldv, cmpv, newv; + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); newv = int128_make128(new_lo, new_hi); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); env->xregs[rs + 1] = int128_getlo(oldv); env->xregs[rs] = int128_gethi(oldv); -#endif } /* diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 8a24278d79..bb9c4d8ac7 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -37,6 +37,7 @@ #include "trace-tcg.h" #include "translate-a64.h" +#include "qemu/atomic128.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; @@ -2086,26 +2087,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, get_mem_index(s), MO_64 | MO_ALIGN | s->be_data); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); - } else if (s->be_data == MO_LE) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!HAVE_CMPXCHG128) { + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; + } else if (s->be_data == MO_LE) { gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); - } - } else { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); - } else { - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); } + } else if (s->be_data == MO_LE) { + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, @@ -2175,14 +2177,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, } tcg_temp_free_i64(cmp); } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - TCGv_i32 tcg_rs = tcg_const_i32(rs); - - if (s->be_data == MO_LE) { - gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + if (HAVE_CMPXCHG128) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); } else { - gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(tcg_rs); } else { TCGv_i64 d1 = tcg_temp_new_i64(); TCGv_i64 d2 = tcg_temp_new_i64(); diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c index 30c26b9d9c..6cc53bcb40 100644 --- a/target/i386/mem_helper.c +++ b/target/i386/mem_helper.c @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #include "tcg.h" void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) @@ -137,10 +138,7 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) if ((a0 & 0xf) != 0) { raise_exception_ra(env, EXCP0D_GPF, ra); - } else { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else + } else if (HAVE_CMPXCHG128) { int eflags = cpu_cc_compute_all(env, CC_OP); Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); @@ -159,7 +157,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) eflags &= ~CC_Z; } CC_SRC = eflags; -#endif + } else { + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } #endif diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 28af4d191c..e48be4b334 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -195,10 +195,125 @@ struct CPUMIPSState { #define MSAIR_ProcID 8 #define MSAIR_Rev 0 +/* + * Summary of CP0 registers + * ======================== + * + * + * Register 0 Register 1 Register 2 Register 3 + * ---------- ---------- ---------- ---------- + * + * 0 Index Random EntryLo0 EntryLo1 + * 1 MVPControl VPEControl TCStatus GlobalNumber + * 2 MVPConf0 VPEConf0 TCBind + * 3 MVPConf1 VPEConf1 TCRestart + * 4 VPControl YQMask TCHalt + * 5 VPESchedule TCContext + * 6 VPEScheFBack TCSchedule + * 7 VPEOpt TCScheFBack TCOpt + * + * + * Register 4 Register 5 Register 6 Register 7 + * ---------- ---------- ---------- ---------- + * + * 0 Context PageMask Wired HWREna + * 1 ContextConfig PageGrain SRSConf0 + * 2 UserLocal SegCtl0 SRSConf1 + * 3 XContextConfig SegCtl1 SRSConf2 + * 4 DebugContextID SegCtl2 SRSConf3 + * 5 MemoryMapID PWBase SRSConf4 + * 6 PWField PWCtl + * 7 PWSize + * + * + * Register 8 Register 9 Register 10 Register 11 + * ---------- ---------- ----------- ----------- + * + * 0 BadVAddr Count EntryHi Compare + * 1 BadInstr + * 2 BadInstrP + * 3 BadInstrX + * 4 GuestCtl1 GuestCtl0Ext + * 5 GuestCtl2 + * 6 GuestCtl3 + * 7 + * + * + * Register 12 Register 13 Register 14 Register 15 + * ----------- ----------- ----------- ----------- + * + * 0 Status Cause EPC PRId + * 1 IntCtl EBase + * 2 SRSCtl NestedEPC CDMMBase + * 3 SRSMap CMGCRBase + * 4 View_IPL View_RIPL BEVVA + * 5 SRSMap2 NestedExc + * 6 GuestCtl0 + * 7 GTOffset + * + * + * Register 16 Register 17 Register 18 Register 19 + * ----------- ----------- ----------- ----------- + * + * 0 Config LLAddr WatchLo WatchHi + * 1 Config1 MAAR WatchLo WatchHi + * 2 Config2 MAARI WatchLo WatchHi + * 3 Config3 WatchLo WatchHi + * 4 Config4 WatchLo WatchHi + * 5 Config5 WatchLo WatchHi + * 6 WatchLo WatchHi + * 7 WatchLo WatchHi + * + * + * Register 20 Register 21 Register 22 Register 23 + * ----------- ----------- ----------- ----------- + * + * 0 XContext Debug + * 1 TraceControl + * 2 TraceControl2 + * 3 UserTraceData1 + * 4 TraceIBPC + * 5 TraceDBPC + * 6 Debug2 + * 7 + * + * + * Register 24 Register 25 Register 26 Register 27 + * ----------- ----------- ----------- ----------- + * + * 0 DEPC PerfCnt ErrCtl CacheErr + * 1 PerfCnt + * 2 TraceControl3 PerfCnt + * 3 UserTraceData2 PerfCnt + * 4 PerfCnt + * 5 PerfCnt + * 6 PerfCnt + * 7 PerfCnt + * + * + * Register 28 Register 29 Register 30 Register 31 + * ----------- ----------- ----------- ----------- + * + * 0 DataLo DataHi ErrorEPC DESAVE + * 1 TagLo TagHi + * 2 DataLo DataHi KScratch<n> + * 3 TagLo TagHi KScratch<n> + * 4 DataLo DataHi KScratch<n> + * 5 TagLo TagHi KScratch<n> + * 6 DataLo DataHi KScratch<n> + * 7 TagLo TagHi KScratch<n> + * + */ +/* + * CP0 Register 0 + */ int32_t CP0_Index; /* CP0_MVP* are per MVP registers. */ int32_t CP0_VPControl; #define CP0VPCtl_DIS 0 +/* + * CP0 Register 1 + */ int32_t CP0_Random; int32_t CP0_VPEControl; #define CP0VPECo_YSI 21 @@ -239,7 +354,13 @@ struct CPUMIPSState { #define CP0VPEOpt_DWX2 2 #define CP0VPEOpt_DWX1 1 #define CP0VPEOpt_DWX0 0 +/* + * CP0 Register 2 + */ uint64_t CP0_EntryLo0; +/* + * CP0 Register 3 + */ uint64_t CP0_EntryLo1; #if defined(TARGET_MIPS64) # define CP0EnLo_RI 63 @@ -250,8 +371,14 @@ struct CPUMIPSState { #endif int32_t CP0_GlobalNumber; #define CP0GN_VPId 0 +/* + * CP0 Register 4 + */ target_ulong CP0_Context; target_ulong CP0_KScratch[MIPS_KSCRATCH_NUM]; +/* + * CP0 Register 5 + */ int32_t CP0_PageMask; int32_t CP0_PageGrain_rw_bitmask; int32_t CP0_PageGrain; @@ -289,7 +416,47 @@ struct CPUMIPSState { #define CP0SC2_XR 56 #define CP0SC2_XR_MASK (0xFFULL << CP0SC2_XR) #define CP0SC2_MASK (CP0SC_1GMASK | (CP0SC_1GMASK << 16) | CP0SC2_XR_MASK) + target_ulong CP0_PWBase; + target_ulong CP0_PWField; +#if defined(TARGET_MIPS64) +#define CP0PF_BDI 32 /* 37..32 */ +#define CP0PF_GDI 24 /* 29..24 */ +#define CP0PF_UDI 18 /* 23..18 */ +#define CP0PF_MDI 12 /* 17..12 */ +#define CP0PF_PTI 6 /* 11..6 */ +#define CP0PF_PTEI 0 /* 5..0 */ +#else +#define CP0PF_GDW 24 /* 29..24 */ +#define CP0PF_UDW 18 /* 23..18 */ +#define CP0PF_MDW 12 /* 17..12 */ +#define CP0PF_PTW 6 /* 11..6 */ +#define CP0PF_PTEW 0 /* 5..0 */ +#endif + target_ulong CP0_PWSize; +#if defined(TARGET_MIPS64) +#define CP0PS_BDW 32 /* 37..32 */ +#endif +#define CP0PS_PS 30 +#define CP0PS_GDW 24 /* 29..24 */ +#define CP0PS_UDW 18 /* 23..18 */ +#define CP0PS_MDW 12 /* 17..12 */ +#define CP0PS_PTW 6 /* 11..6 */ +#define CP0PS_PTEW 0 /* 5..0 */ +/* + * CP0 Register 6 + */ int32_t CP0_Wired; + int32_t CP0_PWCtl; +#define CP0PC_PWEN 31 +#if defined(TARGET_MIPS64) +#define CP0PC_PWDIREXT 30 +#define CP0PC_XK 28 +#define CP0PC_XS 27 +#define CP0PC_XU 26 +#endif +#define CP0PC_DPH 7 +#define CP0PC_HUGEPG 6 +#define CP0PC_PSN 0 /* 5..0 */ int32_t CP0_SRSConf0_rw_bitmask; int32_t CP0_SRSConf0; #define CP0SRSC0_M 31 @@ -319,16 +486,34 @@ struct CPUMIPSState { #define CP0SRSC4_SRS15 20 #define CP0SRSC4_SRS14 10 #define CP0SRSC4_SRS13 0 +/* + * CP0 Register 7 + */ int32_t CP0_HWREna; +/* + * CP0 Register 8 + */ target_ulong CP0_BadVAddr; uint32_t CP0_BadInstr; uint32_t CP0_BadInstrP; uint32_t CP0_BadInstrX; +/* + * CP0 Register 9 + */ int32_t CP0_Count; +/* + * CP0 Register 10 + */ target_ulong CP0_EntryHi; #define CP0EnHi_EHINV 10 target_ulong CP0_EntryHi_ASID_mask; +/* + * CP0 Register 11 + */ int32_t CP0_Compare; +/* + * CP0 Register 12 + */ int32_t CP0_Status; #define CP0St_CU3 31 #define CP0St_CU2 30 @@ -370,6 +555,9 @@ struct CPUMIPSState { #define CP0SRSMap_SSV2 8 #define CP0SRSMap_SSV1 4 #define CP0SRSMap_SSV0 0 +/* + * CP0 Register 13 + */ int32_t CP0_Cause; #define CP0Ca_BD 31 #define CP0Ca_TI 30 @@ -381,12 +569,21 @@ struct CPUMIPSState { #define CP0Ca_IP 8 #define CP0Ca_IP_mask 0x0000FF00 #define CP0Ca_EC 2 +/* + * CP0 Register 14 + */ target_ulong CP0_EPC; +/* + * CP0 Register 15 + */ int32_t CP0_PRid; target_ulong CP0_EBase; target_ulong CP0_EBaseWG_rw_bitmask; #define CP0EBase_WG 11 target_ulong CP0_CMGCRBase; +/* + * CP0 Register 16 + */ int32_t CP0_Config0; #define CP0C0_M 31 #define CP0C0_K23 28 /* 30..28 */ @@ -503,6 +700,9 @@ struct CPUMIPSState { uint64_t CP0_MAAR[MIPS_MAAR_MAX]; int32_t CP0_MAARI; /* XXX: Maybe make LLAddr per-TC? */ +/* + * CP0 Register 17 + */ uint64_t lladdr; target_ulong llval; target_ulong llnewval; @@ -511,11 +711,23 @@ struct CPUMIPSState { target_ulong llreg; uint64_t CP0_LLAddr_rw_bitmask; int CP0_LLAddr_shift; +/* + * CP0 Register 18 + */ target_ulong CP0_WatchLo[8]; +/* + * CP0 Register 19 + */ int32_t CP0_WatchHi[8]; #define CP0WH_ASID 16 +/* + * CP0 Register 20 + */ target_ulong CP0_XContext; int32_t CP0_Framemask; +/* + * CP0 Register 23 + */ int32_t CP0_Debug; #define CP0DB_DBD 31 #define CP0DB_DM 30 @@ -535,18 +747,40 @@ struct CPUMIPSState { #define CP0DB_DDBL 2 #define CP0DB_DBp 1 #define CP0DB_DSS 0 +/* + * CP0 Register 24 + */ target_ulong CP0_DEPC; +/* + * CP0 Register 25 + */ int32_t CP0_Performance0; +/* + * CP0 Register 26 + */ int32_t CP0_ErrCtl; #define CP0EC_WST 29 #define CP0EC_SPR 28 #define CP0EC_ITC 26 +/* + * CP0 Register 28 + */ uint64_t CP0_TagLo; int32_t CP0_DataLo; +/* + * CP0 Register 29 + */ int32_t CP0_TagHi; int32_t CP0_DataHi; +/* + * CP0 Register 30 + */ target_ulong CP0_ErrorEPC; +/* + * CP0 Register 31 + */ int32_t CP0_DESAVE; + /* We waste some space so we can handle shadow registers like TCs. */ TCState tcs[MIPS_SHADOW_SET_MAX]; CPUMIPSFPUContext fpus[MIPS_FPU_MAX]; @@ -596,8 +830,9 @@ struct CPUMIPSState { #define MIPS_HFLAG_BX 0x40000 /* branch exchanges execution mode */ #define MIPS_HFLAG_BMASK (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT) /* MIPS DSP resources access. */ -#define MIPS_HFLAG_DSP 0x080000 /* Enable access to MIPS DSP resources. */ -#define MIPS_HFLAG_DSPR2 0x100000 /* Enable access to MIPS DSPR2 resources. */ +#define MIPS_HFLAG_DSP 0x080000 /* Enable access to DSP resources. */ +#define MIPS_HFLAG_DSP_R2 0x100000 /* Enable access to DSP R2 resources. */ +#define MIPS_HFLAG_DSP_R3 0x20000000 /* Enable access to DSP R3 resources. */ /* Extra flag about HWREna register. */ #define MIPS_HFLAG_HWRENA_ULR 0x200000 /* ULR bit from HWREna is set. */ #define MIPS_HFLAG_SBRI 0x400000 /* R6 SDBBP causes RI excpt. in user mode */ @@ -614,7 +849,7 @@ struct CPUMIPSState { int CCRes; /* Cycle count resolution/divisor */ uint32_t CP0_Status_rw_bitmask; /* Read/write bits in CP0_Status */ uint32_t CP0_TCStatus_rw_bitmask; /* Read/write bits in CP0_TCStatus */ - int insn_flags; /* Supported instruction set */ + uint64_t insn_flags; /* Supported instruction set */ /* Fields up to this point are cleared by a CPU reset */ struct {} end_reset_fields; diff --git a/target/mips/helper.c b/target/mips/helper.c index f0c268b83c..8988452dbd 100644 --- a/target/mips/helper.c +++ b/target/mips/helper.c @@ -537,6 +537,342 @@ hwaddr mips_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } #endif +#if !defined(CONFIG_USER_ONLY) +#if !defined(TARGET_MIPS64) + +/* + * Perform hardware page table walk + * + * Memory accesses are performed using the KERNEL privilege level. + * Synchronous exceptions detected on memory accesses cause a silent exit + * from page table walking, resulting in a TLB or XTLB Refill exception. + * + * Implementations are not required to support page table walk memory + * accesses from mapped memory regions. When an unsupported access is + * attempted, a silent exit is taken, resulting in a TLB or XTLB Refill + * exception. + * + * Note that if an exception is caused by AddressTranslation or LoadMemory + * functions, the exception is not taken, a silent exit is taken, + * resulting in a TLB or XTLB Refill exception. + */ + +static bool get_pte(CPUMIPSState *env, uint64_t vaddr, int entry_size, + uint64_t *pte) +{ + if ((vaddr & ((entry_size >> 3) - 1)) != 0) { + return false; + } + if (entry_size == 64) { + *pte = cpu_ldq_code(env, vaddr); + } else { + *pte = cpu_ldl_code(env, vaddr); + } + return true; +} + +static uint64_t get_tlb_entry_layout(CPUMIPSState *env, uint64_t entry, + int entry_size, int ptei) +{ + uint64_t result = entry; + uint64_t rixi; + if (ptei > entry_size) { + ptei -= 32; + } + result >>= (ptei - 2); + rixi = result & 3; + result >>= 2; + result |= rixi << CP0EnLo_XI; + return result; +} + +static int walk_directory(CPUMIPSState *env, uint64_t *vaddr, + int directory_index, bool *huge_page, bool *hgpg_directory_hit, + uint64_t *pw_entrylo0, uint64_t *pw_entrylo1) +{ + int dph = (env->CP0_PWCtl >> CP0PC_DPH) & 0x1; + int psn = (env->CP0_PWCtl >> CP0PC_PSN) & 0x3F; + int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1; + int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F; + int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3; + int directory_shift = (ptew > 1) ? -1 : + (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift; + int leaf_shift = (ptew > 1) ? -1 : + (ptew == 1) ? native_shift + 1 : native_shift; + uint32_t direntry_size = 1 << (directory_shift + 3); + uint32_t leafentry_size = 1 << (leaf_shift + 3); + uint64_t entry; + uint64_t paddr; + int prot; + uint64_t lsb = 0; + uint64_t w = 0; + + if (get_physical_address(env, &paddr, &prot, *vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + /* wrong base address */ + return 0; + } + if (!get_pte(env, *vaddr, direntry_size, &entry)) { + return 0; + } + + if ((entry & (1 << psn)) && hugepg) { + *huge_page = true; + *hgpg_directory_hit = true; + entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew); + w = directory_index - 1; + if (directory_index & 0x1) { + /* Generate adjacent page from same PTE for odd TLB page */ + lsb = (1 << w) >> 6; + *pw_entrylo0 = entry & ~lsb; /* even page */ + *pw_entrylo1 = entry | lsb; /* odd page */ + } else if (dph) { + int oddpagebit = 1 << leaf_shift; + uint64_t vaddr2 = *vaddr ^ oddpagebit; + if (*vaddr & oddpagebit) { + *pw_entrylo1 = entry; + } else { + *pw_entrylo0 = entry; + } + if (get_physical_address(env, &paddr, &prot, vaddr2, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return 0; + } + if (!get_pte(env, vaddr2, leafentry_size, &entry)) { + return 0; + } + entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew); + if (*vaddr & oddpagebit) { + *pw_entrylo0 = entry; + } else { + *pw_entrylo1 = entry; + } + } else { + return 0; + } + return 1; + } else { + *vaddr = entry; + return 2; + } +} + +static bool page_table_walk_refill(CPUMIPSState *env, vaddr address, int rw, + int mmu_idx) +{ + int gdw = (env->CP0_PWSize >> CP0PS_GDW) & 0x3F; + int udw = (env->CP0_PWSize >> CP0PS_UDW) & 0x3F; + int mdw = (env->CP0_PWSize >> CP0PS_MDW) & 0x3F; + int ptw = (env->CP0_PWSize >> CP0PS_PTW) & 0x3F; + int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F; + + /* Initial values */ + bool huge_page = false; + bool hgpg_bdhit = false; + bool hgpg_gdhit = false; + bool hgpg_udhit = false; + bool hgpg_mdhit = false; + + int32_t pw_pagemask = 0; + target_ulong pw_entryhi = 0; + uint64_t pw_entrylo0 = 0; + uint64_t pw_entrylo1 = 0; + + /* Native pointer size */ + /*For the 32-bit architectures, this bit is fixed to 0.*/ + int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3; + + /* Indices from PWField */ + int pf_gdw = (env->CP0_PWField >> CP0PF_GDW) & 0x3F; + int pf_udw = (env->CP0_PWField >> CP0PF_UDW) & 0x3F; + int pf_mdw = (env->CP0_PWField >> CP0PF_MDW) & 0x3F; + int pf_ptw = (env->CP0_PWField >> CP0PF_PTW) & 0x3F; + int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + + /* Indices computed from faulting address */ + int gindex = (address >> pf_gdw) & ((1 << gdw) - 1); + int uindex = (address >> pf_udw) & ((1 << udw) - 1); + int mindex = (address >> pf_mdw) & ((1 << mdw) - 1); + int ptindex = (address >> pf_ptw) & ((1 << ptw) - 1); + + /* Other HTW configs */ + int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1; + + /* HTW Shift values (depend on entry size) */ + int directory_shift = (ptew > 1) ? -1 : + (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift; + int leaf_shift = (ptew > 1) ? -1 : + (ptew == 1) ? native_shift + 1 : native_shift; + + /* Offsets into tables */ + int goffset = gindex << directory_shift; + int uoffset = uindex << directory_shift; + int moffset = mindex << directory_shift; + int ptoffset0 = (ptindex >> 1) << (leaf_shift + 1); + int ptoffset1 = ptoffset0 | (1 << (leaf_shift)); + + uint32_t leafentry_size = 1 << (leaf_shift + 3); + + /* Starting address - Page Table Base */ + uint64_t vaddr = env->CP0_PWBase; + + uint64_t dir_entry; + uint64_t paddr; + int prot; + int m; + + if (!(env->CP0_Config3 & (1 << CP0C3_PW))) { + /* walker is unimplemented */ + return false; + } + if (!(env->CP0_PWCtl & (1 << CP0PC_PWEN))) { + /* walker is disabled */ + return false; + } + if (!(gdw > 0 || udw > 0 || mdw > 0)) { + /* no structure to walk */ + return false; + } + if ((directory_shift == -1) || (leaf_shift == -1)) { + return false; + } + + /* Global Directory */ + if (gdw > 0) { + vaddr |= goffset; + switch (walk_directory(env, &vaddr, pf_gdw, &huge_page, &hgpg_gdhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Upper directory */ + if (udw > 0) { + vaddr |= uoffset; + switch (walk_directory(env, &vaddr, pf_udw, &huge_page, &hgpg_udhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Middle directory */ + if (mdw > 0) { + vaddr |= moffset; + switch (walk_directory(env, &vaddr, pf_mdw, &huge_page, &hgpg_mdhit, + &pw_entrylo0, &pw_entrylo1)) + { + case 0: + return false; + case 1: + goto refill; + case 2: + default: + break; + } + } + + /* Leaf Level Page Table - First half of PTE pair */ + vaddr |= ptoffset0; + if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return false; + } + if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) { + return false; + } + dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew); + pw_entrylo0 = dir_entry; + + /* Leaf Level Page Table - Second half of PTE pair */ + vaddr |= ptoffset1; + if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD, + ACCESS_INT, cpu_mmu_index(env, false)) != + TLBRET_MATCH) { + return false; + } + if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) { + return false; + } + dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew); + pw_entrylo1 = dir_entry; + +refill: + + m = (1 << pf_ptw) - 1; + + if (huge_page) { + switch (hgpg_bdhit << 3 | hgpg_gdhit << 2 | hgpg_udhit << 1 | + hgpg_mdhit) + { + case 4: + m = (1 << pf_gdw) - 1; + if (pf_gdw & 1) { + m >>= 1; + } + break; + case 2: + m = (1 << pf_udw) - 1; + if (pf_udw & 1) { + m >>= 1; + } + break; + case 1: + m = (1 << pf_mdw) - 1; + if (pf_mdw & 1) { + m >>= 1; + } + break; + } + } + pw_pagemask = m >> 12; + update_pagemask(env, pw_pagemask << 13, &pw_pagemask); + pw_entryhi = (address & ~0x1fff) | (env->CP0_EntryHi & 0xFF); + { + target_ulong tmp_entryhi = env->CP0_EntryHi; + int32_t tmp_pagemask = env->CP0_PageMask; + uint64_t tmp_entrylo0 = env->CP0_EntryLo0; + uint64_t tmp_entrylo1 = env->CP0_EntryLo1; + + env->CP0_EntryHi = pw_entryhi; + env->CP0_PageMask = pw_pagemask; + env->CP0_EntryLo0 = pw_entrylo0; + env->CP0_EntryLo1 = pw_entrylo1; + + /* + * The hardware page walker inserts a page into the TLB in a manner + * identical to a TLBWR instruction as executed by the software refill + * handler. + */ + r4k_helper_tlbwr(env); + + env->CP0_EntryHi = tmp_entryhi; + env->CP0_PageMask = tmp_pagemask; + env->CP0_EntryLo0 = tmp_entrylo0; + env->CP0_EntryLo1 = tmp_entrylo1; + } + return true; +} +#endif +#endif + int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, int mmu_idx) { @@ -558,8 +894,7 @@ int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, /* data access */ #if !defined(CONFIG_USER_ONLY) - /* XXX: put correct access by using cpu_restore_state() - correctly */ + /* XXX: put correct access by using cpu_restore_state() correctly */ access_type = ACCESS_INT; ret = get_physical_address(env, &physical, &prot, address, rw, access_type, mmu_idx); @@ -583,6 +918,32 @@ int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw, } else if (ret < 0) #endif { +#if !defined(CONFIG_USER_ONLY) +#if !defined(TARGET_MIPS64) + if ((ret == TLBRET_NOMATCH) && (env->tlb->nb_tlb > 1)) { + /* + * Memory reads during hardware page table walking are performed + * as if they were kernel-mode load instructions. + */ + int mode = (env->hflags & MIPS_HFLAG_KSU); + bool ret_walker; + env->hflags &= ~MIPS_HFLAG_KSU; + ret_walker = page_table_walk_refill(env, address, rw, mmu_idx); + env->hflags |= mode; + if (ret_walker) { + ret = get_physical_address(env, &physical, &prot, + address, rw, access_type, mmu_idx); + if (ret == TLBRET_MATCH) { + tlb_set_page(cs, address & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot | PAGE_EXEC, + mmu_idx, TARGET_PAGE_SIZE); + ret = 0; + return ret; + } + } + } +#endif +#endif raise_mmu_exception(env, address, rw, ret); ret = 1; } diff --git a/target/mips/helper.h b/target/mips/helper.h index b2a780a6f2..c23e4e5d97 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -120,6 +120,8 @@ DEF_HELPER_2(mtc0_pagegrain, void, env, tl) DEF_HELPER_2(mtc0_segctl0, void, env, tl) DEF_HELPER_2(mtc0_segctl1, void, env, tl) DEF_HELPER_2(mtc0_segctl2, void, env, tl) +DEF_HELPER_2(mtc0_pwfield, void, env, tl) +DEF_HELPER_2(mtc0_pwsize, void, env, tl) DEF_HELPER_2(mtc0_wired, void, env, tl) DEF_HELPER_2(mtc0_srsconf0, void, env, tl) DEF_HELPER_2(mtc0_srsconf1, void, env, tl) @@ -127,6 +129,7 @@ DEF_HELPER_2(mtc0_srsconf2, void, env, tl) DEF_HELPER_2(mtc0_srsconf3, void, env, tl) DEF_HELPER_2(mtc0_srsconf4, void, env, tl) DEF_HELPER_2(mtc0_hwrena, void, env, tl) +DEF_HELPER_2(mtc0_pwctl, void, env, tl) DEF_HELPER_2(mtc0_count, void, env, tl) DEF_HELPER_2(mtc0_entryhi, void, env, tl) DEF_HELPER_2(mttc0_entryhi, void, env, tl) diff --git a/target/mips/internal.h b/target/mips/internal.h index e41051f8e6..8b1b2456af 100644 --- a/target/mips/internal.h +++ b/target/mips/internal.h @@ -59,7 +59,7 @@ struct mips_def_t { int32_t CP0_PageGrain_rw_bitmask; int32_t CP0_PageGrain; target_ulong CP0_EBaseWG_rw_bitmask; - int insn_flags; + uint64_t insn_flags; enum mips_mmu_types mmu_type; }; @@ -211,6 +211,7 @@ uint64_t float_class_d(uint64_t arg, float_status *fst); extern unsigned int ieee_rm[]; int ieee_ex_to_mips(int xcpt); +void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask); static inline void restore_rounding_mode(CPUMIPSState *env) { @@ -306,9 +307,9 @@ static inline void compute_hflags(CPUMIPSState *env) { env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 | MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU | - MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2 | - MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | MIPS_HFLAG_FRE | - MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL); + MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 | + MIPS_HFLAG_DSP_R3 | MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | + MIPS_HFLAG_FRE | MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL); if (env->CP0_Status & (1 << CP0St_ERL)) { env->hflags |= MIPS_HFLAG_ERL; } @@ -355,16 +356,29 @@ static inline void compute_hflags(CPUMIPSState *env) (env->CP0_Config5 & (1 << CP0C5_SBRI))) { env->hflags |= MIPS_HFLAG_SBRI; } - if (env->insn_flags & ASE_DSPR2) { - /* Enables access MIPS DSP resources, now our cpu is DSP ASER2, - so enable to access DSPR2 resources. */ + if (env->insn_flags & ASE_DSP_R3) { + /* + * Our cpu supports DSP R3 ASE, so enable + * access to DSP R3 resources. + */ if (env->CP0_Status & (1 << CP0St_MX)) { - env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2; + env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 | + MIPS_HFLAG_DSP_R3; + } + } else if (env->insn_flags & ASE_DSP_R2) { + /* + * Our cpu supports DSP R2 ASE, so enable + * access to DSP R2 resources. + */ + if (env->CP0_Status & (1 << CP0St_MX)) { + env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2; } } else if (env->insn_flags & ASE_DSP) { - /* Enables access MIPS DSP resources, now our cpu is DSP ASE, - so enable to access DSP resources. */ + /* + * Our cpu supports DSP ASE, so enable + * access to DSP resources. + */ if (env->CP0_Status & (1 << CP0St_MX)) { env->hflags |= MIPS_HFLAG_DSP; } diff --git a/target/mips/machine.c b/target/mips/machine.c index 5ba78acd6d..70a8909b90 100644 --- a/target/mips/machine.c +++ b/target/mips/machine.c @@ -212,8 +212,8 @@ const VMStateDescription vmstate_tlb = { const VMStateDescription vmstate_mips_cpu = { .name = "cpu", - .version_id = 11, - .minimum_version_id = 11, + .version_id = 15, + .minimum_version_id = 15, .post_load = cpu_post_load, .fields = (VMStateField[]) { /* Active TC */ @@ -256,7 +256,11 @@ const VMStateDescription vmstate_mips_cpu = { VMSTATE_UINTTL(env.CP0_SegCtl0, MIPSCPU), VMSTATE_UINTTL(env.CP0_SegCtl1, MIPSCPU), VMSTATE_UINTTL(env.CP0_SegCtl2, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWBase, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWField, MIPSCPU), + VMSTATE_UINTTL(env.CP0_PWSize, MIPSCPU), VMSTATE_INT32(env.CP0_Wired, MIPSCPU), + VMSTATE_INT32(env.CP0_PWCtl, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf0, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf1, MIPSCPU), VMSTATE_INT32(env.CP0_SRSConf2, MIPSCPU), diff --git a/target/mips/mips-defs.h b/target/mips/mips-defs.h index c8e99791ad..71ea4ef892 100644 --- a/target/mips/mips-defs.h +++ b/target/mips/mips-defs.h @@ -22,40 +22,51 @@ #endif #endif -/* Masks used to mark instructions to indicate which ISA level they - were introduced in. */ -#define ISA_MIPS1 0x00000001 -#define ISA_MIPS2 0x00000002 -#define ISA_MIPS3 0x00000004 -#define ISA_MIPS4 0x00000008 -#define ISA_MIPS5 0x00000010 -#define ISA_MIPS32 0x00000020 -#define ISA_MIPS32R2 0x00000040 -#define ISA_MIPS64 0x00000080 -#define ISA_MIPS64R2 0x00000100 -#define ISA_MIPS32R3 0x00000200 -#define ISA_MIPS64R3 0x00000400 -#define ISA_MIPS32R5 0x00000800 -#define ISA_MIPS64R5 0x00001000 -#define ISA_MIPS32R6 0x00002000 -#define ISA_MIPS64R6 0x00004000 -#define ISA_NANOMIPS32 0x00008000 - -/* MIPS ASEs. */ -#define ASE_MIPS16 0x00010000 -#define ASE_MIPS3D 0x00020000 -#define ASE_MDMX 0x00040000 -#define ASE_DSP 0x00080000 -#define ASE_DSPR2 0x00100000 -#define ASE_MT 0x00200000 -#define ASE_SMARTMIPS 0x00400000 -#define ASE_MICROMIPS 0x00800000 -#define ASE_MSA 0x01000000 - -/* Chip specific instructions. */ -#define INSN_LOONGSON2E 0x20000000 -#define INSN_LOONGSON2F 0x40000000 -#define INSN_VR54XX 0x80000000 +/* + * bit definitions for insn_flags (ISAs/ASEs flags) + * ------------------------------------------------ + */ +/* + * bits 0-31: MIPS base instruction sets + */ +#define ISA_MIPS1 0x0000000000000001ULL +#define ISA_MIPS2 0x0000000000000002ULL +#define ISA_MIPS3 0x0000000000000004ULL +#define ISA_MIPS4 0x0000000000000008ULL +#define ISA_MIPS5 0x0000000000000010ULL +#define ISA_MIPS32 0x0000000000000020ULL +#define ISA_MIPS32R2 0x0000000000000040ULL +#define ISA_MIPS64 0x0000000000000080ULL +#define ISA_MIPS64R2 0x0000000000000100ULL +#define ISA_MIPS32R3 0x0000000000000200ULL +#define ISA_MIPS64R3 0x0000000000000400ULL +#define ISA_MIPS32R5 0x0000000000000800ULL +#define ISA_MIPS64R5 0x0000000000001000ULL +#define ISA_MIPS32R6 0x0000000000002000ULL +#define ISA_MIPS64R6 0x0000000000004000ULL +#define ISA_NANOMIPS32 0x0000000000008000ULL +/* + * bits 32-47: MIPS ASEs + */ +#define ASE_MIPS16 0x0000000100000000ULL +#define ASE_MIPS3D 0x0000000200000000ULL +#define ASE_MDMX 0x0000000400000000ULL +#define ASE_DSP 0x0000000800000000ULL +#define ASE_DSP_R2 0x0000001000000000ULL +#define ASE_DSP_R3 0x0000002000000000ULL +#define ASE_MT 0x0000004000000000ULL +#define ASE_SMARTMIPS 0x0000008000000000ULL +#define ASE_MICROMIPS 0x0000010000000000ULL +#define ASE_MSA 0x0000020000000000ULL +/* + * bits 48-55: vendor-specific base instruction sets + */ +#define INSN_LOONGSON2E 0x0001000000000000ULL +#define INSN_LOONGSON2F 0x0002000000000000ULL +#define INSN_VR54XX 0x0004000000000000ULL +/* + * bits 56-63: vendor-specific ASEs + */ /* MIPS CPU defines. */ #define CPU_MIPS1 (ISA_MIPS1) diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c index c148b310cd..d1f1d1aa35 100644 --- a/target/mips/op_helper.c +++ b/target/mips/op_helper.c @@ -1400,7 +1400,7 @@ void helper_mtc0_context(CPUMIPSState *env, target_ulong arg1) env->CP0_Context = (env->CP0_Context & 0x007FFFFF) | (arg1 & ~0x007FFFFF); } -void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) +void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask) { uint64_t mask = arg1 >> (TARGET_PAGE_BITS + 1); if (!(env->insn_flags & ISA_MIPS32R6) || (arg1 == ~0) || @@ -1411,6 +1411,11 @@ void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) } } +void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1) +{ + update_pagemask(env, arg1, &env->CP0_PageMask); +} + void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1) { /* SmartMIPS not implemented */ @@ -1445,6 +1450,77 @@ void helper_mtc0_segctl2(CPUMIPSState *env, target_ulong arg1) tlb_flush(cs); } +void helper_mtc0_pwfield(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + uint64_t mask = 0x3F3FFFFFFFULL; + uint32_t old_ptei = (env->CP0_PWField >> CP0PF_PTEI) & 0x3FULL; + uint32_t new_ptei = (arg1 >> CP0PF_PTEI) & 0x3FULL; + + if ((env->insn_flags & ISA_MIPS32R6)) { + if (((arg1 >> CP0PF_BDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_BDI); + } + if (((arg1 >> CP0PF_GDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_GDI); + } + if (((arg1 >> CP0PF_UDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_UDI); + } + if (((arg1 >> CP0PF_MDI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_MDI); + } + if (((arg1 >> CP0PF_PTI) & 0x3FULL) < 12) { + mask &= ~(0x3FULL << CP0PF_PTI); + } + } + env->CP0_PWField = arg1 & mask; + + if ((new_ptei >= 32) || + ((env->insn_flags & ISA_MIPS32R6) && + (new_ptei == 0 || new_ptei == 1))) { + env->CP0_PWField = (env->CP0_PWField & ~0x3FULL) | + (old_ptei << CP0PF_PTEI); + } +#else + uint32_t mask = 0x3FFFFFFF; + uint32_t old_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F; + uint32_t new_ptew = (arg1 >> CP0PF_PTEW) & 0x3F; + + if ((env->insn_flags & ISA_MIPS32R6)) { + if (((arg1 >> CP0PF_GDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_GDW); + } + if (((arg1 >> CP0PF_UDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_UDW); + } + if (((arg1 >> CP0PF_MDW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_MDW); + } + if (((arg1 >> CP0PF_PTW) & 0x3F) < 12) { + mask &= ~(0x3F << CP0PF_PTW); + } + } + env->CP0_PWField = arg1 & mask; + + if ((new_ptew >= 32) || + ((env->insn_flags & ISA_MIPS32R6) && + (new_ptew == 0 || new_ptew == 1))) { + env->CP0_PWField = (env->CP0_PWField & ~0x3F) | + (old_ptew << CP0PF_PTEW); + } +#endif +} + +void helper_mtc0_pwsize(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + env->CP0_PWSize = arg1 & 0x3F7FFFFFFFULL; +#else + env->CP0_PWSize = arg1 & 0x3FFFFFFF; +#endif +} + void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1) { if (env->insn_flags & ISA_MIPS32R6) { @@ -1456,6 +1532,16 @@ void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1) } } +void helper_mtc0_pwctl(CPUMIPSState *env, target_ulong arg1) +{ +#if defined(TARGET_MIPS64) + /* PWEn = 0. Hardware page table walking is not implemented. */ + env->CP0_PWCtl = (env->CP0_PWCtl & 0x000000C0) | (arg1 & 0x5C00003F); +#else + env->CP0_PWCtl = (arg1 & 0x800000FF); +#endif +} + void helper_mtc0_srsconf0(CPUMIPSState *env, target_ulong arg1) { env->CP0_SRSConf0 |= arg1 & env->CP0_SRSConf0_rw_bitmask; diff --git a/target/mips/translate.c b/target/mips/translate.c index ab16cdb911..3a0bdd55c8 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -1389,6 +1389,545 @@ enum { OPC_BINSRI_df = (0x7 << 23) | OPC_MSA_BIT_09, }; + +/* + * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET + * ============================================ + * + * MXU (full name: MIPS eXtension/enhanced Unit) is an SIMD extension of MIPS32 + * instructions set. It is designed to fit the needs of signal, graphical and + * video processing applications. MXU instruction set is used in Xburst family + * of microprocessors by Ingenic. + * + * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is + * the control register. + * + * The notation used in MXU assembler mnemonics: + * + * XRa, XRb, XRc, XRd - MXU registers + * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers + * s12 - a subfield of an instruction code + * strd2 - a subfield of an instruction code + * eptn2 - a subfield of an instruction code + * eptn3 - a subfield of an instruction code + * optn2 - a subfield of an instruction code + * optn3 - a subfield of an instruction code + * sft4 - a subfield of an instruction code + * + * Load/Store instructions Multiplication instructions + * ----------------------- --------------------------- + * + * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt + * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt + * S32LDDV XRa, Rb, rc, strd2 S32SUB XRa, XRd, Rs, Rt + * S32STDV XRa, Rb, rc, strd2 S32SUBU XRa, XRd, Rs, Rt + * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt + * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt + * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2 + * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2 + * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2 + * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 + * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 + * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2 + * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 + * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2 + * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd + * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd + * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2 + * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2 + * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2 + * S16SDI XRa, Rb, s10, eptn2 + * S8LDD XRa, Rb, s8, eptn3 + * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions + * S8LDI XRa, Rb, s8, eptn3 ------------------------------------- + * S8SDI XRa, Rb, s8, eptn3 + * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2 + * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd + * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2 + * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2 + * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2 + * S32CPS XRa, XRb, XRc + * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2 + * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2 + * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2 + * D16ASUM XRa, XRb, XRc, XRd, eptn2 + * S32MAX XRa, XRb, XRc D16CPS XRa, XRb, + * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc + * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc + * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2 + * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2 + * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2 + * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc + * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd + * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc + * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc + * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd + * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd + * Q8SLT XRa, XRb, XRc + * Q8SLTU XRa, XRb, XRc + * Q8MOVZ XRa, XRb, XRc Shift instructions + * Q8MOVN XRa, XRb, XRc ------------------ + * + * D32SLL XRa, XRb, XRc, XRd, sft4 + * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4 + * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4 + * D32SARL XRa, XRb, XRc, sft4 + * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb + * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb + * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb + * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb + * Q16SLL XRa, XRb, XRc, XRd, sft4 + * Q16SLR XRa, XRb, XRc, XRd, sft4 + * Miscelaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4 + * ------------------------- Q16SLLV XRa, XRb, Rb + * Q16SLRV XRa, XRb, Rb + * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb + * S32ALN XRa, XRb, XRc, Rb + * S32ALNI XRa, XRb, XRc, s3 + * S32LUI XRa, s8, optn3 Move instructions + * S32EXTR XRa, XRb, Rb, bits5 ----------------- + * S32EXTRV XRa, XRb, Rs, Rt + * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb + * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb + * + * + * bits + * 05..00 + * + * ┌─ 000000 ─ OPC_MXU_S32MADD + * ├─ 000001 ─ OPC_MXU_S32MADDU + * ├─ 000010 ─ <not assigned> + * │ 20..18 + * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX + * │ ├─ 001 ─ OPC_MXU_S32MIN + * │ ├─ 010 ─ OPC_MXU_D16MAX + * │ ├─ 011 ─ OPC_MXU_D16MIN + * │ ├─ 100 ─ OPC_MXU_Q8MAX + * │ ├─ 101 ─ OPC_MXU_Q8MIN + * │ ├─ 110 ─ OPC_MXU_Q8SLT + * │ └─ 111 ─ OPC_MXU_Q8SLTU + * ├─ 000100 ─ OPC_MXU_S32MSUB + * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18 + * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT + * │ ├─ 001 ─ OPC_MXU_D16SLT + * │ ├─ 010 ─ OPC_MXU_D16AVG + * │ ├─ 011 ─ OPC_MXU_D16AVGR + * │ ├─ 100 ─ OPC_MXU_Q8AVG + * │ ├─ 101 ─ OPC_MXU_Q8AVGR + * │ └─ 111 ─ OPC_MXU_Q8ADD + * │ + * │ 20..18 + * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS + * │ ├─ 010 ─ OPC_MXU_D16CPS + * │ ├─ 100 ─ OPC_MXU_Q8ABD + * │ └─ 110 ─ OPC_MXU_Q16SAT + * ├─ 001000 ─ OPC_MXU_D16MUL + * │ 25..24 + * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF + * │ └─ 01 ─ OPC_MXU_D16MULE + * ├─ 001010 ─ OPC_MXU_D16MAC + * ├─ 001011 ─ OPC_MXU_D16MACF + * ├─ 001100 ─ OPC_MXU_D16MADL + * │ 25..24 + * ├─ 001101 ─ OPC_MXU__POOL04 ─┬─ 00 ─ OPC_MXU_S16MAD + * │ └─ 01 ─ OPC_MXU_S16MAD_1 + * ├─ 001110 ─ OPC_MXU_Q16ADD + * ├─ 001111 ─ OPC_MXU_D16MACE + * │ 23 + * ├─ 010000 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32LDD + * │ └─ 1 ─ OPC_MXU_S32LDDR + * │ + * │ 23 + * ├─ 010001 ─ OPC_MXU__POOL06 ─┬─ 0 ─ OPC_MXU_S32STD + * │ └─ 1 ─ OPC_MXU_S32STDR + * │ + * │ 13..10 + * ├─ 010010 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32LDDV + * │ └─ 0001 ─ OPC_MXU_S32LDDVR + * │ + * │ 13..10 + * ├─ 010011 ─ OPC_MXU__POOL08 ─┬─ 0000 ─ OPC_MXU_S32STDV + * │ └─ 0001 ─ OPC_MXU_S32STDVR + * │ + * │ 23 + * ├─ 010100 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32LDI + * │ └─ 1 ─ OPC_MXU_S32LDIR + * │ + * │ 23 + * ├─ 010101 ─ OPC_MXU__POOL10 ─┬─ 0 ─ OPC_MXU_S32SDI + * │ └─ 1 ─ OPC_MXU_S32SDIR + * │ + * │ 13..10 + * ├─ 010110 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32LDIV + * │ └─ 0001 ─ OPC_MXU_S32LDIVR + * │ + * │ 13..10 + * ├─ 010111 ─ OPC_MXU__POOL12 ─┬─ 0000 ─ OPC_MXU_S32SDIV + * │ └─ 0001 ─ OPC_MXU_S32SDIVR + * ├─ 011000 ─ OPC_MXU_D32ADD + * │ 23..22 + * MXU ├─ 011001 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_D32ACC + * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM + * │ └─ 10 ─ OPC_MXU_D32ASUM + * ├─ 011010 ─ <not assigned> + * │ 23..22 + * ├─ 011011 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q16ACC + * │ ├─ 01 ─ OPC_MXU_Q16ACCM + * │ └─ 10 ─ OPC_MXU_Q16ASUM + * │ + * │ 23..22 + * ├─ 011100 ─ OPC_MXU__POOL15 ─┬─ 00 ─ OPC_MXU_Q8ADDE + * │ ├─ 01 ─ OPC_MXU_D8SUM + * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC + * ├─ 011110 ─ <not assigned> + * ├─ 011111 ─ <not assigned> + * ├─ 100000 ─ <not assigned> + * ├─ 100001 ─ <not assigned> + * ├─ 100010 ─ OPC_MXU_S8LDD + * ├─ 100011 ─ OPC_MXU_S8STD + * ├─ 100100 ─ OPC_MXU_S8LDI + * ├─ 100101 ─ OPC_MXU_S8SDI + * │ 15..14 + * ├─ 100110 ─ OPC_MXU__POOL16 ─┬─ 00 ─ OPC_MXU_S32MUL + * │ ├─ 00 ─ OPC_MXU_S32MULU + * │ ├─ 00 ─ OPC_MXU_S32EXTR + * │ └─ 00 ─ OPC_MXU_S32EXTRV + * │ + * │ 20..18 + * ├─ 100111 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_D32SARW + * │ ├─ 001 ─ OPC_MXU_S32ALN + * ├─ 101000 ─ OPC_MXU_LXB ├─ 010 ─ OPC_MXU_S32ALNI + * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_S32NOR + * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_S32AND + * ├─ 101011 ─ OPC_MXU_S16STD ├─ 101 ─ OPC_MXU_S32OR + * ├─ 101100 ─ OPC_MXU_S16LDI ├─ 110 ─ OPC_MXU_S32XOR + * ├─ 101101 ─ OPC_MXU_S16SDI └─ 111 ─ OPC_MXU_S32LUI + * ├─ 101000 ─ <not assigned> + * ├─ 101001 ─ <not assigned> + * ├─ 101010 ─ <not assigned> + * ├─ 101011 ─ <not assigned> + * ├─ 101100 ─ <not assigned> + * ├─ 101101 ─ <not assigned> + * ├─ 101110 ─ OPC_MXU_S32M2I + * ├─ 101111 ─ OPC_MXU_S32I2M + * ├─ 110000 ─ OPC_MXU_D32SLL + * ├─ 110001 ─ OPC_MXU_D32SLR + * ├─ 110010 ─ OPC_MXU_D32SARL + * ├─ 110011 ─ OPC_MXU_D32SAR + * ├─ 110100 ─ OPC_MXU_Q16SLL + * ├─ 110101 ─ OPC_MXU_Q16SLR 20..18 + * ├─ 110110 ─ OPC_MXU__POOL18 ─┬─ 000 ─ OPC_MXU_D32SLLV + * │ ├─ 001 ─ OPC_MXU_D32SLRV + * │ ├─ 010 ─ OPC_MXU_D32SARV + * │ ├─ 011 ─ OPC_MXU_Q16SLLV + * │ ├─ 100 ─ OPC_MXU_Q16SLRV + * │ └─ 101 ─ OPC_MXU_Q16SARV + * ├─ 110111 ─ OPC_MXU_Q16SAR + * │ 23..22 + * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL + * │ └─ 01 ─ OPC_MXU_Q8MULSU + * │ + * │ 20..18 + * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ + * │ ├─ 001 ─ OPC_MXU_Q8MOVN + * │ ├─ 010 ─ OPC_MXU_D16MOVZ + * │ ├─ 011 ─ OPC_MXU_D16MOVN + * │ ├─ 100 ─ OPC_MXU_S32MOVZ + * │ └─ 101 ─ OPC_MXU_S32MOV + * │ + * │ 23..22 + * ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC + * │ └─ 10 ─ OPC_MXU_Q8MACSU + * ├─ 111011 ─ OPC_MXU_Q16SCOP + * ├─ 111100 ─ OPC_MXU_Q8MADL + * ├─ 111101 ─ OPC_MXU_S32SFL + * ├─ 111110 ─ OPC_MXU_Q8SAD + * └─ 111111 ─ <not assigned> + * + * + * Compiled after: + * + * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit + * Programming Manual", Ingenic Semiconductor Co, Ltd., 2017 + */ + +enum { + OPC_MXU_S32MADD = 0x00, + OPC_MXU_S32MADDU = 0x01, + /* not assigned 0x02 */ + OPC_MXU__POOL00 = 0x03, + OPC_MXU_S32MSUB = 0x04, + OPC_MXU_S32MSUBU = 0x05, + OPC_MXU__POOL01 = 0x06, + OPC_MXU__POOL02 = 0x07, + OPC_MXU_D16MUL = 0x08, + OPC_MXU__POOL03 = 0x09, + OPC_MXU_D16MAC = 0x0A, + OPC_MXU_D16MACF = 0x0B, + OPC_MXU_D16MADL = 0x0C, + OPC_MXU__POOL04 = 0x0D, + OPC_MXU_Q16ADD = 0x0E, + OPC_MXU_D16MACE = 0x0F, + OPC_MXU__POOL05 = 0x10, + OPC_MXU__POOL06 = 0x11, + OPC_MXU__POOL07 = 0x12, + OPC_MXU__POOL08 = 0x13, + OPC_MXU__POOL09 = 0x14, + OPC_MXU__POOL10 = 0x15, + OPC_MXU__POOL11 = 0x16, + OPC_MXU__POOL12 = 0x17, + OPC_MXU_D32ADD = 0x18, + OPC_MXU__POOL13 = 0x19, + /* not assigned 0x1A */ + OPC_MXU__POOL14 = 0x1B, + OPC_MXU__POOL15 = 0x1C, + OPC_MXU_Q8ACCE = 0x1D, + /* not assigned 0x1E */ + /* not assigned 0x1F */ + /* not assigned 0x20 */ + /* not assigned 0x21 */ + OPC_MXU_S8LDD = 0x22, + OPC_MXU_S8STD = 0x23, + OPC_MXU_S8LDI = 0x24, + OPC_MXU_S8SDI = 0x25, + OPC_MXU__POOL16 = 0x26, + OPC_MXU__POOL17 = 0x27, + OPC_MXU_LXB = 0x28, + /* not assigned 0x29 */ + OPC_MXU_S16LDD = 0x2A, + OPC_MXU_S16STD = 0x2B, + OPC_MXU_S16LDI = 0x2C, + OPC_MXU_S16SDI = 0x2D, + OPC_MXU_S32M2I = 0x2E, + OPC_MXU_S32I2M = 0x2F, + OPC_MXU_D32SLL = 0x30, + OPC_MXU_D32SLR = 0x31, + OPC_MXU_D32SARL = 0x32, + OPC_MXU_D32SAR = 0x33, + OPC_MXU_Q16SLL = 0x34, + OPC_MXU_Q16SLR = 0x35, + OPC_MXU__POOL18 = 0x36, + OPC_MXU_Q16SAR = 0x37, + OPC_MXU__POOL19 = 0x38, + OPC_MXU__POOL20 = 0x39, + OPC_MXU__POOL21 = 0x3A, + OPC_MXU_Q16SCOP = 0x3B, + OPC_MXU_Q8MADL = 0x3C, + OPC_MXU_S32SFL = 0x3D, + OPC_MXU_Q8SAD = 0x3E, + /* not assigned 0x3F */ +}; + + +/* + * MXU pool 00 + */ +enum { + OPC_MXU_S32MAX = 0x00, + OPC_MXU_S32MIN = 0x01, + OPC_MXU_D16MAX = 0x02, + OPC_MXU_D16MIN = 0x03, + OPC_MXU_Q8MAX = 0x04, + OPC_MXU_Q8MIN = 0x05, + OPC_MXU_Q8SLT = 0x06, + OPC_MXU_Q8SLTU = 0x07, +}; + +/* + * MXU pool 01 + */ +enum { + OPC_MXU_S32SLT = 0x00, + OPC_MXU_D16SLT = 0x01, + OPC_MXU_D16AVG = 0x02, + OPC_MXU_D16AVGR = 0x03, + OPC_MXU_Q8AVG = 0x04, + OPC_MXU_Q8AVGR = 0x05, + OPC_MXU_Q8ADD = 0x07, +}; + +/* + * MXU pool 02 + */ +enum { + OPC_MXU_S32CPS = 0x00, + OPC_MXU_D16CPS = 0x02, + OPC_MXU_Q8ABD = 0x04, + OPC_MXU_Q16SAT = 0x06, +}; + +/* + * MXU pool 03 + */ +enum { + OPC_MXU_D16MULF = 0x00, + OPC_MXU_D16MULE = 0x01, +}; + +/* + * MXU pool 04 + */ +enum { + OPC_MXU_S16MAD = 0x00, + OPC_MXU_S16MAD_1 = 0x01, +}; + +/* + * MXU pool 05 + */ +enum { + OPC_MXU_S32LDD = 0x00, + OPC_MXU_S32LDDR = 0x01, +}; + +/* + * MXU pool 06 + */ +enum { + OPC_MXU_S32STD = 0x00, + OPC_MXU_S32STDR = 0x01, +}; + +/* + * MXU pool 07 + */ +enum { + OPC_MXU_S32LDDV = 0x00, + OPC_MXU_S32LDDVR = 0x01, +}; + +/* + * MXU pool 08 + */ +enum { + OPC_MXU_S32STDV = 0x00, + OPC_MXU_S32STDVR = 0x01, +}; + +/* + * MXU pool 09 + */ +enum { + OPC_MXU_S32LDI = 0x00, + OPC_MXU_S32LDIR = 0x01, +}; + +/* + * MXU pool 10 + */ +enum { + OPC_MXU_S32SDI = 0x00, + OPC_MXU_S32SDIR = 0x01, +}; + +/* + * MXU pool 11 + */ +enum { + OPC_MXU_S32LDIV = 0x00, + OPC_MXU_S32LDIVR = 0x01, +}; + +/* + * MXU pool 12 + */ +enum { + OPC_MXU_S32SDIV = 0x00, + OPC_MXU_S32SDIVR = 0x01, +}; + +/* + * MXU pool 13 + */ +enum { + OPC_MXU_D32ACC = 0x00, + OPC_MXU_D32ACCM = 0x01, + OPC_MXU_D32ASUM = 0x02, +}; + +/* + * MXU pool 14 + */ +enum { + OPC_MXU_Q16ACC = 0x00, + OPC_MXU_Q16ACCM = 0x01, + OPC_MXU_Q16ASUM = 0x02, +}; + +/* + * MXU pool 15 + */ +enum { + OPC_MXU_Q8ADDE = 0x00, + OPC_MXU_D8SUM = 0x01, + OPC_MXU_D8SUMC = 0x02, +}; + +/* + * MXU pool 16 + */ +enum { + OPC_MXU_S32MUL = 0x00, + OPC_MXU_S32MULU = 0x01, + OPC_MXU_S32EXTR = 0x02, + OPC_MXU_S32EXTRV = 0x03, +}; + +/* + * MXU pool 17 + */ +enum { + OPC_MXU_D32SARW = 0x00, + OPC_MXU_S32ALN = 0x01, + OPC_MXU_S32ALNI = 0x02, + OPC_MXU_S32NOR = 0x03, + OPC_MXU_S32AND = 0x04, + OPC_MXU_S32OR = 0x05, + OPC_MXU_S32XOR = 0x06, + OPC_MXU_S32LUI = 0x07, +}; + +/* + * MXU pool 18 + */ +enum { + OPC_MXU_D32SLLV = 0x00, + OPC_MXU_D32SLRV = 0x01, + OPC_MXU_D32SARV = 0x03, + OPC_MXU_Q16SLLV = 0x04, + OPC_MXU_Q16SLRV = 0x05, + OPC_MXU_Q16SARV = 0x07, +}; + +/* + * MXU pool 19 + */ +enum { + OPC_MXU_Q8MUL = 0x00, + OPC_MXU_Q8MULSU = 0x01, +}; + +/* + * MXU pool 20 + */ +enum { + OPC_MXU_Q8MOVZ = 0x00, + OPC_MXU_Q8MOVN = 0x01, + OPC_MXU_D16MOVZ = 0x02, + OPC_MXU_D16MOVN = 0x03, + OPC_MXU_S32MOVZ = 0x04, + OPC_MXU_S32MOVN = 0x05, +}; + +/* + * MXU pool 21 + */ +enum { + OPC_MXU_Q8MAC = 0x00, + OPC_MXU_Q8MACSU = 0x01, +}; + + /* global register indices */ static TCGv cpu_gpr[32], cpu_PC; static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC]; @@ -1447,8 +1986,9 @@ typedef struct DisasContext { target_ulong saved_pc; target_ulong page_start; uint32_t opcode; - int insn_flags; + uint64_t insn_flags; int32_t CP0_Config1; + int32_t CP0_Config2; int32_t CP0_Config3; int32_t CP0_Config5; /* Routine used to access memory */ @@ -1857,9 +2397,20 @@ static inline void check_dsp(DisasContext *ctx) } } -static inline void check_dspr2(DisasContext *ctx) +static inline void check_dsp_r2(DisasContext *ctx) { - if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSPR2))) { + if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R2))) { + if (ctx->insn_flags & ASE_DSP) { + generate_exception_end(ctx, EXCP_DSPDIS); + } else { + generate_exception_end(ctx, EXCP_RI); + } + } +} + +static inline void check_dsp_r3(DisasContext *ctx) +{ + if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R3))) { if (ctx->insn_flags & ASE_DSP) { generate_exception_end(ctx, EXCP_DSPDIS); } else { @@ -1870,7 +2421,7 @@ static inline void check_dspr2(DisasContext *ctx) /* This code generates a "reserved instruction" exception if the CPU does not support the instruction set corresponding to flags. */ -static inline void check_insn(DisasContext *ctx, int flags) +static inline void check_insn(DisasContext *ctx, uint64_t flags) { if (unlikely(!(ctx->insn_flags & flags))) { generate_exception_end(ctx, EXCP_RI); @@ -1880,7 +2431,7 @@ static inline void check_insn(DisasContext *ctx, int flags) /* This code generates a "reserved instruction" exception if the CPU has corresponding flag set which indicates that the instruction has been removed. */ -static inline void check_insn_opc_removed(DisasContext *ctx, int flags) +static inline void check_insn_opc_removed(DisasContext *ctx, uint64_t flags) { if (unlikely(ctx->insn_flags & flags)) { generate_exception_end(ctx, EXCP_RI); @@ -1927,6 +2478,19 @@ static inline void check_xnp(DisasContext *ctx) } } +#ifndef CONFIG_USER_ONLY +/* + * This code generates a "reserved instruction" exception if the + * Config3 PW bit is NOT set. + */ +static inline void check_pw(DisasContext *ctx) +{ + if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_PW)))) { + generate_exception_end(ctx, EXCP_RI); + } +} +#endif + /* * This code generates a "reserved instruction" exception if the * Config3 MT bit is NOT set. @@ -5537,6 +6101,21 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) tcg_gen_ext32s_tl(arg, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWField)); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWSize)); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -5572,6 +6151,11 @@ static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4)); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl)); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -6238,6 +6822,21 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_segctl2(cpu_env, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + gen_mtc0_store32(arg, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwfield(cpu_env, arg); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_helper_mtc0_pwsize(cpu_env, arg); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -6273,6 +6872,11 @@ static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_srsconf4(cpu_env, arg); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwctl(cpu_env, arg); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -6948,6 +7552,21 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel) tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2)); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWField)); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWSize)); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -6983,6 +7602,11 @@ static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4)); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl)); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -7631,6 +8255,21 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_segctl2(cpu_env, arg); rn = "SegCtl2"; break; + case 5: + check_pw(ctx); + tcg_gen_st_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase)); + rn = "PWBase"; + break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwfield(cpu_env, arg); + rn = "PWField"; + break; + case 7: + check_pw(ctx); + gen_helper_mtc0_pwsize(cpu_env, arg); + rn = "PWSize"; + break; default: goto cp0_unimplemented; } @@ -7666,6 +8305,11 @@ static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel) gen_helper_mtc0_srsconf4(cpu_env, arg); rn = "SRSConf4"; break; + case 6: + check_pw(ctx); + gen_helper_mtc0_pwctl(cpu_env, arg); + rn = "PWCtl"; + break; default: goto cp0_unimplemented; } @@ -14999,15 +15643,15 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) case 0x38: /* cmovs */ switch ((ctx->opcode >> 6) & 0x7) { - case MOVN_FMT: /* SELNEZ_FMT */ + case MOVN_FMT: /* SELEQZ_FMT */ if (ctx->insn_flags & ISA_MIPS32R6) { - /* SELNEZ_FMT */ + /* SELEQZ_FMT */ switch ((ctx->opcode >> 9) & 0x3) { case FMT_SDPS_S: - gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs); + gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs); break; case FMT_SDPS_D: - gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs); + gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs); break; default: goto pool32f_invalid; @@ -15021,15 +15665,15 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) check_insn_opc_removed(ctx, ISA_MIPS32R6); FINSN_3ARG_SDPS(MOVN); break; - case MOVZ_FMT: /* SELEQZ_FMT */ + case MOVZ_FMT: /* SELNEZ_FMT */ if (ctx->insn_flags & ISA_MIPS32R6) { - /* SELEQZ_FMT */ + /* SELNEZ_FMT */ switch ((ctx->opcode >> 9) & 0x3) { case FMT_SDPS_S: - gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs); + gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs); break; case FMT_SDPS_D: - gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs); + gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs); break; default: goto pool32f_invalid; @@ -16488,6 +17132,40 @@ enum { NM_P_SC = 0x0b, }; +/* P.LS.E0 instruction pool */ +enum { + NM_LBE = 0x00, + NM_SBE = 0x01, + NM_LBUE = 0x02, + NM_P_PREFE = 0x03, + NM_LHE = 0x04, + NM_SHE = 0x05, + NM_LHUE = 0x06, + NM_CACHEE = 0x07, + NM_LWE = 0x08, + NM_SWE = 0x09, + NM_P_LLE = 0x0a, + NM_P_SCE = 0x0b, +}; + +/* P.PREFE instruction pool */ +enum { + NM_SYNCIE = 0x00, + NM_PREFE = 0x01, +}; + +/* P.LLE instruction pool */ +enum { + NM_LLE = 0x00, + NM_LLWPE = 0x01, +}; + +/* P.SCE instruction pool */ +enum { + NM_SCE = 0x00, + NM_SCWPE = 0x01, +}; + /* P.LS.WM instruction pool */ enum { NM_LWM = 0x00, @@ -17444,7 +18122,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, case NM_POOL32AXF_2_0_7: switch (extract32(ctx->opcode, 9, 3)) { case NM_DPA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_ph(t0, v1, v0, cpu_env); break; case NM_DPAQ_S_W_PH: @@ -17452,7 +18130,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpaq_s_w_ph(t0, v1, v0, cpu_env); break; case NM_DPS_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_ph(t0, v1, v0, cpu_env); break; case NM_DPSQ_S_W_PH: @@ -17467,7 +18145,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, case NM_POOL32AXF_2_8_15: switch (extract32(ctx->opcode, 9, 3)) { case NM_DPAX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpax_w_ph(t0, v0, v1, cpu_env); break; case NM_DPAQ_SA_L_W: @@ -17475,7 +18153,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpaq_sa_l_w(t0, v0, v1, cpu_env); break; case NM_DPSX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsx_w_ph(t0, v0, v1, cpu_env); break; case NM_DPSQ_SA_L_W: @@ -17494,7 +18172,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpau_h_qbl(t0, v0, v1, cpu_env); break; case NM_DPAQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_s_w_ph(t0, v0, v1, cpu_env); break; case NM_DPSU_H_QBL: @@ -17502,11 +18180,11 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpsu_h_qbl(t0, v0, v1, cpu_env); break; case NM_DPSQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_s_w_ph(t0, v0, v1, cpu_env); break; case NM_MULSA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulsa_w_ph(t0, v0, v1, cpu_env); break; default: @@ -17521,7 +18199,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpau_h_qbr(t0, v1, v0, cpu_env); break; case NM_DPAQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_sa_w_ph(t0, v1, v0, cpu_env); break; case NM_DPSU_H_QBR: @@ -17529,7 +18207,7 @@ static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc, gen_helper_dpsu_h_qbr(t0, v1, v0, cpu_env); break; case NM_DPSQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_sa_w_ph(t0, v1, v0, cpu_env); break; case NM_MULSAQ_S_W_PH: @@ -17571,7 +18249,7 @@ static void gen_pool32axf_2_nanomips_insn(DisasContext *ctx, uint32_t opc, gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd); break; case NM_BALIGN: - check_dspr2(ctx); + check_dsp_r2(ctx); if (rt != 0) { gen_load_gpr(t0, rs); rd &= 3; @@ -17801,7 +18479,7 @@ static void gen_pool32axf_4_nanomips_insn(DisasContext *ctx, uint32_t opc, switch (opc) { case NM_ABSQ_S_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_qb(v0_t, v0_t, cpu_env); gen_store_gpr(v0_t, ret); break; @@ -17940,7 +18618,7 @@ static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc, switch (opc) { case NM_SHRA_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); tcg_gen_movi_tl(t0, rd >> 2); switch (extract32(ctx->opcode, 12, 1)) { case 0: @@ -17956,7 +18634,7 @@ static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc, } break; case NM_SHRL_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); tcg_gen_movi_tl(t0, rd >> 1); gen_helper_shrl_ph(t0, t0, rs_t); gen_store_gpr(t0, rt); @@ -18881,19 +19559,19 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_EQ_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_LT_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); break; case NM_CMPGDU_LE_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t); tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4); gen_store_gpr(v1_t, ret); @@ -18949,7 +19627,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDQH_R_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDQH_PH */ @@ -18964,7 +19642,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDQH_R_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDQH_W */ @@ -18994,7 +19672,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDU_PH */ @@ -19009,7 +19687,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_ADDUH_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* ADDUH_QB */ @@ -19039,7 +19717,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SHRAV_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SHRAV_QB */ @@ -19069,7 +19747,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBQH_R_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBQH_PH */ @@ -19084,7 +19762,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBQH_R_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBQH_W */ @@ -19114,7 +19792,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBU_PH */ @@ -19129,7 +19807,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_SUBUH_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* SUBUH_QB */ @@ -19159,7 +19837,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_PRECR_SRA_R_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* PRECR_SRA_PH_W */ @@ -19199,22 +19877,22 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_MULQ_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_ph(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_MULQ_RS_W: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_rs_w(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_MULQ_S_W: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_w(v1_t, v1_t, v2_t, cpu_env); gen_store_gpr(v1_t, ret); break; case NM_APPEND: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_load_gpr(t0, rs); if (rd != 0) { tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], rd, 32 - rd); @@ -19232,7 +19910,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_SHRLV_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(v1_t, v1_t, v2_t); gen_store_gpr(v1_t, ret); break; @@ -19274,7 +19952,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, gen_store_gpr(v1_t, ret); break; case NM_MUL_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (extract32(ctx->opcode, 10, 1)) { case 0: /* MUL_PH */ @@ -19289,7 +19967,7 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, } break; case NM_PRECR_QB_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_qb_ph(v1_t, v1_t, v2_t); gen_store_gpr(v1_t, ret); break; @@ -19326,8 +20004,8 @@ static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc, case 0: /* SHRA_PH */ gen_helper_shra_ph(v1_t, t0, v1_t); - break; gen_store_gpr(v1_t, rt); + break; case 1: /* SHRA_R_PH */ gen_helper_shra_r_ph(v1_t, t0, v1_t); @@ -20098,7 +20776,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) gen_compute_branch_cp1_nm(ctx, OPC_BC1NEZ, rt, s); break; case NM_BPOSGE32C: - check_dspr2(ctx); + check_dsp_r3(ctx); { int32_t imm = extract32(ctx->opcode, 1, 13) | extract32(ctx->opcode, 0, 1) << 13; @@ -20607,7 +21285,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, switch (op1) { /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */ case OPC_MULT_G_2E: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (op2) { case OPC_ADDUH_QB: gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t); @@ -20650,7 +21328,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_ABSQ_S_PH_DSP: switch (op2) { case OPC_ABSQ_S_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env); break; case OPC_ABSQ_S_PH: @@ -20729,11 +21407,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBQ_PH: @@ -20757,11 +21435,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDSC: @@ -20785,7 +21463,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_CMPU_EQ_QB_DSP: switch (op2) { case OPC_PRECR_QB_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECRQ_QB_PH: @@ -20793,7 +21471,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECR_SRA_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_t = tcg_const_i32(v2); gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t, @@ -20802,7 +21480,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, break; } case OPC_PRECR_SRA_R_PH_W: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_t = tcg_const_i32(v2); gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t, @@ -20884,7 +21562,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t); break; case OPC_ABSQ_S_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env); break; case OPC_ABSQ_S_PW: @@ -20928,19 +21606,19 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBU_S_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_SUBUH_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SUBUH_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_ADDQ_PW: @@ -20968,19 +21646,19 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDU_S_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_ADDUH_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t); break; case OPC_ADDUH_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t); break; } @@ -20988,11 +21666,11 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, case OPC_CMPU_EQ_OB_DSP: switch (op2) { case OPC_PRECR_OB_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t); break; case OPC_PRECR_SRA_QH_PW: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 ret_t = tcg_const_i32(ret); gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t); @@ -21000,7 +21678,7 @@ static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, break; } case OPC_PRECR_SRA_R_QH_PW: - check_dspr2(ctx); + check_dsp_r2(ctx); { TCGv_i32 sa_v = tcg_const_i32(ret); gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v); @@ -21103,27 +21781,27 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRL_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRLV_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRA_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRA_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t); break; case OPC_SHRAV_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRAV_R_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_SHRA_PH: @@ -21202,19 +21880,19 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env); break; case OPC_SHRA_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRAV_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRA_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRAV_R_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRA_PW: @@ -21258,11 +21936,11 @@ static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc, gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t); break; case OPC_SHRL_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0); break; case OPC_SHRLV_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t); break; default: /* Invalid */ @@ -21303,7 +21981,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have * the same mask and op1. */ case OPC_MULT_G_2E: - check_dspr2(ctx); + check_dsp_r2(ctx); switch (op2) { case OPC_MUL_PH: gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); @@ -21338,11 +22016,11 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env); break; case OPC_DPA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQ_S_W_PH: @@ -21350,19 +22028,19 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPAQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPS_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSX_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQ_S_W_PH: @@ -21370,11 +22048,11 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQX_S_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_DPSQX_SA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env); break; case OPC_MULSAQ_S_W_PH: @@ -21406,7 +22084,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env); break; case OPC_MULSA_W_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env); break; } @@ -21435,7 +22113,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env); break; case OPC_DPA_W_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env); break; case OPC_DPAQ_S_W_QH: @@ -21455,7 +22133,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env); break; case OPC_DPS_W_QH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env); break; case OPC_DPSQ_S_W_QH: @@ -21549,7 +22227,7 @@ static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2, gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_MULQ_S_PH: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; } @@ -21773,7 +22451,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t); break; case OPC_CMPGDU_EQ_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21781,7 +22459,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1); break; case OPC_CMPGDU_LT_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21789,7 +22467,7 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1); break; case OPC_CMPGDU_LE_QB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgu_le_qb(t1, v1_t, v2_t); tcg_gen_mov_tl(cpu_gpr[ret], t1); tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF); @@ -21850,15 +22528,15 @@ static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx, gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_EQ_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_LT_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGDU_LE_OB: - check_dspr2(ctx); + check_dsp_r2(ctx); gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env); break; case OPC_CMPGU_EQ_OB: @@ -21916,7 +22594,7 @@ static void gen_mipsdsp_append(CPUMIPSState *env, DisasContext *ctx, { TCGv t0; - check_dspr2(ctx); + check_dsp_r2(ctx); if (rt == 0) { /* Treat as NOP. */ @@ -22801,7 +23479,7 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_MULTU_G_2E: /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have * the same mask and op1. */ - if ((ctx->insn_flags & ASE_DSPR2) && (op1 == OPC_MULT_G_2E)) { + if ((ctx->insn_flags & ASE_DSP_R2) && (op1 == OPC_MULT_G_2E)) { op2 = MASK_ADDUH_QB(ctx->opcode); switch (op2) { case OPC_ADDUH_QB: @@ -25285,6 +25963,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->saved_pc = -1; ctx->insn_flags = env->insn_flags; ctx->CP0_Config1 = env->CP0_Config1; + ctx->CP0_Config2 = env->CP0_Config2; ctx->CP0_Config3 = env->CP0_Config3; ctx->CP0_Config5 = env->CP0_Config5; ctx->btarget = 0; @@ -25799,6 +26478,24 @@ void cpu_state_reset(CPUMIPSState *env) env->CP0_Status |= (1 << CP0St_FR); } + if (env->insn_flags & ISA_MIPS32R6) { + /* PTW = 1 */ + env->CP0_PWSize = 0x40; + /* GDI = 12 */ + /* UDI = 12 */ + /* MDI = 12 */ + /* PRI = 12 */ + /* PTEI = 2 */ + env->CP0_PWField = 0x0C30C302; + } else { + /* GDI = 0 */ + /* UDI = 0 */ + /* MDI = 0 */ + /* PRI = 0 */ + /* PTEI = 2 */ + env->CP0_PWField = 0x02; + } + if (env->CP0_Config3 & (1 << CP0C3_ISA) & (1 << (CP0C3_ISA + 1))) { /* microMIPS on reset when Config3.ISA is 3 */ env->hflags |= MIPS_HFLAG_M16; diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c index b3320b9dc7..acab097820 100644 --- a/target/mips/translate_init.inc.c +++ b/target/mips/translate_init.inc.c @@ -320,7 +320,7 @@ const mips_def_t mips_defs[] = .CP1_fcr31_rw_bitmask = 0xFF83FFFF, .SEGBITS = 32, .PABITS = 32, - .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2, + .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSP_R2, .mmu_type = MMU_TYPE_R4000, }, { @@ -485,7 +485,8 @@ const mips_def_t mips_defs[] = .CP1_fcr31 = (1 << FCR31_ABS2008) | (1 << FCR31_NAN2008), .SEGBITS = 32, .PABITS = 32, - .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSPR2 | ASE_MT, + .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSP_R2 | ASE_DSP_R3 | + ASE_MT, .mmu_type = MMU_TYPE_R4000, }, #if defined(TARGET_MIPS64) @@ -761,7 +762,7 @@ const mips_def_t mips_defs[] = .mmu_type = MMU_TYPE_R4000, }, { - /* A generic CPU providing MIPS64 ASE DSP 2 features. + /* A generic CPU providing MIPS64 DSP R2 ASE features. FIXME: Eventually this should be replaced by a real CPU model. */ .name = "mips64dspr2", .CP0_PRid = 0x00010000, @@ -786,7 +787,7 @@ const mips_def_t mips_defs[] = .CP1_fcr31_rw_bitmask = 0xFF83FFFF, .SEGBITS = 42, .PABITS = 36, - .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSPR2, + .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSP_R2, .mmu_type = MMU_TYPE_R4000, }, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ef64248bc4..7a1481fd0b 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -800,7 +800,7 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) DEF_HELPER_1(tbegin, void, env) DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG, diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 8f0d86d104..a1485fad9b 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "tcg.h" #include "internal.h" +#include "qemu/atomic128.h" //#define DEBUG_OP @@ -215,11 +216,15 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, return i; } -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) +#ifdef TARGET_PPC64 uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -227,7 +232,11 @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, uint32_t opidx) { - Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); + Int128 ret; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); env->retxh = int128_gethi(ret); return int128_getlo(ret); } @@ -235,14 +244,22 @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC()); } void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr, uint64_t lo, uint64_t hi, uint32_t opidx) { - Int128 val = int128_make128(lo, hi); + Int128 val; + + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_ATOMIC128); + val = int128_make128(lo, hi); helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC()); } @@ -252,6 +269,9 @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; @@ -271,6 +291,9 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr, { bool success = false; + /* We will have raised EXCP_ATOMIC from the translator. */ + assert(HAVE_CMPXCHG128); + if (likely(addr == env->reserve_addr)) { Int128 oldv, cmpv, newv; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 881743571b..4e59dd5f42 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -33,6 +33,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" #define CPU_SINGLE_STEP 0x1 @@ -2654,22 +2655,22 @@ static void gen_lq(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -2805,21 +2806,21 @@ static void gen_std(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); - gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); + gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); + gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); - gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } - tcg_temp_free_i32(oi); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ); gen_addr_add(ctx, EA, EA, 8); @@ -3404,26 +3405,26 @@ static void gen_lqarx(DisasContext *ctx) hi = cpu_gpr[rd]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { -#ifdef CONFIG_ATOMIC128 - TCGv_i32 oi = tcg_temp_new_i32(); - if (ctx->le_mode) { - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + if (HAVE_ATOMIC128) { + TCGv_i32 oi = tcg_temp_new_i32(); + if (ctx->le_mode) { + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); + } else { + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, + ctx->mem_idx)); + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + } + tcg_temp_free_i32(oi); + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); } else { - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, - ctx->mem_idx)); - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; + tcg_temp_free(EA); + return; } - tcg_temp_free_i32(oi); - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; - tcg_temp_free(EA); - return; -#endif } else if (ctx->le_mode) { tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16); tcg_gen_mov_tl(cpu_reserve, EA); @@ -3461,20 +3462,22 @@ static void gen_stqcx_(DisasContext *ctx) hi = cpu_gpr[rs]; if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); -#ifdef CONFIG_ATOMIC128 - if (ctx->le_mode) { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + if (HAVE_CMPXCHG128) { + TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); + if (ctx->le_mode) { + gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } else { + gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env, + EA, lo, hi, oi); + } + tcg_temp_free_i32(oi); } else { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); + /* Restart with exclusive lock. */ + gen_helper_exit_atomic(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; } -#else - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; -#endif tcg_temp_free(EA); - tcg_temp_free_i32(oi); } else { TCGLabel *lab_fail = gen_new_label(); TCGLabel *lab_over = gen_new_label(); diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index bacae4f503..490c43e6e6 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -25,6 +25,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "qemu/int128.h" +#include "qemu/atomic128.h" #if !defined(CONFIG_USER_ONLY) #include "hw/s390x/storage-keys.h" @@ -1379,65 +1380,62 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, return cc; } -static void do_cdsg(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3, bool parallel) +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, + uint32_t r1, uint32_t r3) { uintptr_t ra = GETPC(); Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 oldv; + uint64_t oldh, oldl; bool fail; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - fail = !int128_eq(oldv, cmpv); -#endif - } else { - uint64_t oldh, oldl; + check_alignment(env, addr, 16, ra); - check_alignment(env, addr, 16, ra); + oldh = cpu_ldq_data_ra(env, addr + 0, ra); + oldl = cpu_ldq_data_ra(env, addr + 8, ra); - oldh = cpu_ldq_data_ra(env, addr + 0, ra); - oldl = cpu_ldq_data_ra(env, addr + 8, ra); - - oldv = int128_make128(oldl, oldh); - fail = !int128_eq(oldv, cmpv); - if (fail) { - newv = oldv; - } - - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + oldv = int128_make128(oldl, oldh); + fail = !int128_eq(oldv, cmpv); + if (fail) { + newv = oldv; } + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + env->cc_op = fail; env->regs[r1] = int128_gethi(oldv); env->regs[r1 + 1] = int128_getlo(oldv); } -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - do_cdsg(env, addr, r1, r3, false); -} - void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, uint32_t r1, uint32_t r3) { - do_cdsg(env, addr, r1, r3, true); + uintptr_t ra = GETPC(); + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + int mem_idx; + TCGMemOpIdx oi; + Int128 oldv; + bool fail; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + fail = !int128_eq(oldv, cmpv); + + env->cc_op = fail; + env->regs[r1] = int128_gethi(oldv); + env->regs[r1 + 1] = int128_getlo(oldv); } static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2, bool parallel) { -#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128) uint32_t mem_idx = cpu_mmu_index(env, false); -#endif uintptr_t ra = GETPC(); uint32_t fc = extract32(env->regs[0], 0, 8); uint32_t sc = extract32(env->regs[0], 8, 8); @@ -1465,18 +1463,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, probe_write(env, a2, 0, mem_idx, ra); #endif - /* Note that the compare-and-swap is atomic, and the store is atomic, but - the complete operation is not. Therefore we do not need to assert serial - context in order to implement this. That said, restart early if we can't - support either operation that is supposed to be atomic. */ + /* + * Note that the compare-and-swap is atomic, and the store is atomic, + * but the complete operation is not. Therefore we do not need to + * assert serial context in order to implement this. That said, + * restart early if we can't support either operation that is supposed + * to be atomic. + */ if (parallel) { - int mask = 0; -#if !defined(CONFIG_ATOMIC64) - mask = -8; -#elif !defined(CONFIG_ATOMIC128) - mask = -16; + uint32_t max = 2; +#ifdef CONFIG_ATOMIC64 + max = 3; #endif - if (((4 << fc) | (1 << sc)) & mask) { + if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || + (HAVE_ATOMIC128 ? 0 : sc > max)) { cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); } } @@ -1546,16 +1546,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 ov; - if (parallel) { -#ifdef CONFIG_ATOMIC128 - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); - cc = !int128_eq(ov, cv); -#else - /* Note that we asserted !parallel above. */ - g_assert_not_reached(); -#endif - } else { + if (!parallel) { uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); @@ -1567,6 +1558,13 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); + } else if (HAVE_CMPXCHG128) { + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); + cc = !int128_eq(ov, cv); + } else { + /* Note that we asserted !parallel above. */ + g_assert_not_reached(); } env->regs[r3 + 0] = int128_gethi(ov); @@ -1596,18 +1594,16 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, cpu_stq_data_ra(env, a2, svh, ra); break; case 4: - if (parallel) { -#ifdef CONFIG_ATOMIC128 + if (!parallel) { + cpu_stq_data_ra(env, a2 + 0, svh, ra); + cpu_stq_data_ra(env, a2 + 8, svl, ra); + } else if (HAVE_ATOMIC128) { TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); Int128 sv = int128_make128(svl, svh); helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); -#else + } else { /* Note that we asserted !parallel above. */ g_assert_not_reached(); -#endif - } else { - cpu_stq_data_ra(env, a2 + 0, svh, ra); - cpu_stq_data_ra(env, a2 + 8, svl, ra); } break; default: @@ -2100,76 +2096,64 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr) #endif /* load pair from quadword */ -static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) { uintptr_t ra = GETPC(); uint64_t hi, lo; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); - hi = int128_gethi(v); - lo = int128_getlo(v); -#endif - } else { - check_alignment(env, addr, 16, ra); - - hi = cpu_ldq_data_ra(env, addr + 0, ra); - lo = cpu_ldq_data_ra(env, addr + 8, ra); - } + check_alignment(env, addr, 16, ra); + hi = cpu_ldq_data_ra(env, addr + 0, ra); + lo = cpu_ldq_data_ra(env, addr + 8, ra); env->retxl = lo; return hi; } -uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) -{ - return do_lpq(env, addr, false); -} - uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) { - return do_lpq(env, addr, true); -} - -/* store pair to quadword */ -static void do_stpq(CPUS390XState *env, uint64_t addr, - uint64_t low, uint64_t high, bool parallel) -{ uintptr_t ra = GETPC(); + uint64_t hi, lo; + int mem_idx; + TCGMemOpIdx oi; + Int128 v; - if (parallel) { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + assert(HAVE_ATOMIC128); - Int128 v = int128_make128(low, high); - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); -#endif - } else { - check_alignment(env, addr, 16, ra); + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); + hi = int128_gethi(v); + lo = int128_getlo(v); - cpu_stq_data_ra(env, addr + 0, high, ra); - cpu_stq_data_ra(env, addr + 8, low, ra); - } + env->retxl = lo; + return hi; } +/* store pair to quadword */ void HELPER(stpq)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, false); + uintptr_t ra = GETPC(); + + check_alignment(env, addr, 16, ra); + cpu_stq_data_ra(env, addr + 0, high, ra); + cpu_stq_data_ra(env, addr + 8, low, ra); } void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, uint64_t low, uint64_t high) { - do_stpq(env, addr, low, high, true); + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + Int128 v; + + assert(HAVE_ATOMIC128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = int128_make128(low, high); + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); } /* Execute instruction. This instruction executes an insn modified with diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 18861cd186..b5bd56b7ee 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -44,6 +44,7 @@ #include "trace-tcg.h" #include "exec/translator.h" #include "exec/log.h" +#include "qemu/atomic128.h" /* Information that (most) every instruction needs to manipulate. */ @@ -1128,11 +1129,19 @@ struct DisasInsn { const char *name; + /* Pre-process arguments before HELP_OP. */ void (*help_in1)(DisasContext *, DisasFields *, DisasOps *); void (*help_in2)(DisasContext *, DisasFields *, DisasOps *); void (*help_prep)(DisasContext *, DisasFields *, DisasOps *); + + /* + * Post-process output after HELP_OP. + * Note that these are not called if HELP_OP returns DISAS_NORETURN. + */ void (*help_wout)(DisasContext *, DisasFields *, DisasOps *); void (*help_cout)(DisasContext *, DisasOps *); + + /* Implement the operation itself. */ DisasJumpType (*help_op)(DisasContext *, DisasOps *); uint64_t data; @@ -2032,6 +2041,7 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) int r3 = get_field(s->fields, r3); int d2 = get_field(s->fields, d2); int b2 = get_field(s->fields, b2); + DisasJumpType ret = DISAS_NEXT; TCGv_i64 addr; TCGv_i32 t_r1, t_r3; @@ -2039,17 +2049,20 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) addr = get_address(s, 0, b2, d2); t_r1 = tcg_const_i32(r1); t_r3 = tcg_const_i32(r3); - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + } else if (HAVE_CMPXCHG128) { gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); } else { - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + gen_helper_exit_atomic(cpu_env); + ret = DISAS_NORETURN; } tcg_temp_free_i64(addr); tcg_temp_free_i32(t_r1); tcg_temp_free_i32(t_r3); set_cc_static(s); - return DISAS_NEXT; + return ret; } static DisasJumpType op_csst(DisasContext *s, DisasOps *o) @@ -3026,10 +3039,13 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o) static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_lpq(o->out, cpu_env, o->in2); + } else if (HAVE_ATOMIC128) { gen_helper_lpq_parallel(o->out, cpu_env, o->in2); } else { - gen_helper_lpq(o->out, cpu_env, o->in2); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return_low128(o->out2); return DISAS_NEXT; @@ -4406,10 +4422,13 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o) static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) { - if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + } else if (HAVE_ATOMIC128) { gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); } else { - gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; } return DISAS_NEXT; } @@ -6125,11 +6144,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) if (insn->help_op) { ret = insn->help_op(s, &o); } - if (insn->help_wout) { - insn->help_wout(s, &f, &o); - } - if (insn->help_cout) { - insn->help_cout(s, &o); + if (ret != DISAS_NORETURN) { + if (insn->help_wout) { + insn->help_wout(s, &f, &o); + } + if (insn->help_cout) { + insn->help_cout(s, &o); + } } /* Free any temporaries created by the helpers. */ diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c index 68f978d80b..2b49d1ca40 100644 --- a/target/unicore32/cpu.c +++ b/target/unicore32/cpu.c @@ -116,8 +116,6 @@ static void uc32_cpu_initfn(Object *obj) env->uncached_asr = ASR_MODE_PRIV; env->regs[31] = 0x03000000; #endif - - tlb_flush(cs); } static const VMStateDescription vmstate_uc32_cpu = { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index daa416a143..7a8015c5a9 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2586,6 +2586,10 @@ void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx) seen this numbered exit before, via tcg_gen_goto_tb. */ tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx)); #endif + /* When not chaining, exit without indicating a link. */ + if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + val = 0; + } } else { /* This is an exit via the exitreq label. */ tcg_debug_assert(idx == TB_EXIT_REQUESTED); @@ -2603,7 +2607,10 @@ void tcg_gen_goto_tb(unsigned idx) tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0); tcg_ctx->goto_tb_issue_mask |= 1 << idx; #endif - tcg_gen_op1i(INDEX_op_goto_tb, idx); + /* When not chaining, we simply fall through to the "fallback" exit. */ + if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + tcg_gen_op1i(INDEX_op_goto_tb, idx); + } } void tcg_gen_lookup_and_goto_ptr(void) @@ -30,6 +30,7 @@ /* Define to jump the ELF file used to communicate with GDB. */ #undef DEBUG_JIT +#include "qemu/error-report.h" #include "qemu/cutils.h" #include "qemu/host-utils.h" #include "qemu/timer.h" @@ -3361,6 +3362,7 @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) const TCGProfile *orig = &s->prof; if (counters) { + PROF_ADD(prof, orig, cpu_exec_time); PROF_ADD(prof, orig, tb_count1); PROF_ADD(prof, orig, tb_count); PROF_ADD(prof, orig, op_count); @@ -3412,11 +3414,32 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) prof.table_op_count[i]); } } + +int64_t tcg_cpu_exec_time(void) +{ + unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); + unsigned int i; + int64_t ret = 0; + + for (i = 0; i < n_ctxs; i++) { + const TCGContext *s = atomic_read(&tcg_ctxs[i]); + const TCGProfile *prof = &s->prof; + + ret += atomic_read(&prof->cpu_exec_time); + } + return ret; +} #else void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) { cpu_fprintf(f, "[TCG profiler not compiled]\n"); } + +int64_t tcg_cpu_exec_time(void) +{ + error_report("%s: TCG profiler not compiled", __func__); + exit(EXIT_FAILURE); +} #endif @@ -3430,7 +3453,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) #ifdef CONFIG_PROFILER { - int n; + int n = 0; QTAILQ_FOREACH(op, &s->ops, link) { n++; @@ -32,6 +32,7 @@ #include "qemu/queue.h" #include "tcg-mo.h" #include "tcg-target.h" +#include "qemu/int128.h" /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 @@ -629,12 +630,13 @@ typedef struct TCGOp { QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); typedef struct TCGProfile { + int64_t cpu_exec_time; int64_t tb_count1; int64_t tb_count; int64_t op_count; /* total insn count */ int op_count_max; /* max insn per TB */ - int64_t temp_count; int temp_count_max; + int64_t temp_count; int64_t del_op_count; int64_t code_in_len; int64_t code_out_len; @@ -1002,6 +1004,7 @@ int tcg_check_temp_count(void); #define tcg_check_temp_count() 0 #endif +int64_t tcg_cpu_exec_time(void); void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf); void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf); @@ -1454,11 +1457,14 @@ GEN_ATOMIC_HELPER_ALL(xchg) #undef GEN_ATOMIC_HELPER #endif /* CONFIG_SOFTMMU */ -#ifdef CONFIG_ATOMIC128 -#include "qemu/int128.h" - -/* These aren't really a "proper" helpers because TCG cannot manage Int128. - However, use the same format as the others, for use by the backends. */ +/* + * These aren't really a "proper" helpers because TCG cannot manage Int128. + * However, use the same format as the others, for use by the backends. + * + * The cmpxchg functions are only defined if HAVE_CMPXCHG128; + * the ld/st functions are only defined if HAVE_ATOMIC128, + * as defined by <qemu/atomic128.h>. + */ Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, Int128 cmpv, Int128 newv, TCGMemOpIdx oi, uintptr_t retaddr); @@ -1475,6 +1481,4 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, TCGMemOpIdx oi, uintptr_t retaddr); -#endif /* CONFIG_ATOMIC128 */ - #endif /* TCG_H */ @@ -4399,8 +4399,6 @@ int main(int argc, char **argv, char **envp) #endif } - colo_info_init(); - if (net_init_clients(&err) < 0) { error_report_err(err); exit(1); |