diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-01-07 20:34:05 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-01-07 20:34:05 +0000 |
commit | e79de63ab1bd1f6550e7b915e433bec1ad1a870a (patch) | |
tree | ad29060323ecea1b9a0f60d5b08984f310b30e44 | |
parent | 470dd6bd360782f5137f7e3376af6a44658eb1d3 (diff) | |
parent | e5e2e4c73926f6f3c1f5da24a350e4345d5ad232 (diff) |
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210107' into staging
Build fix for ppc64 centos7.
Reduce the use of scratch registers for tcg/i386.
Use _aligned_malloc for Win32.
Enable split w^x code gen buffers.
# gpg: Signature made Thu 07 Jan 2021 20:06:38 GMT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* remotes/rth-gitlab/tags/pull-tcg-20210107: (47 commits)
tcg: Constify TCGLabelQemuLdst.raddr
tcg: Constify tcg_code_gen_epilogue
tcg: Remove TCG_TARGET_SUPPORT_MIRROR
tcg/arm: Support split-wx code generation
tcg/mips: Support split-wx code generation
tcg/mips: Do not assert on relocation overflow
accel/tcg: Add mips support to alloc_code_gen_buffer_splitwx_memfd
tcg/riscv: Support split-wx code generation
tcg/riscv: Remove branch-over-branch fallback
tcg/riscv: Fix branch range checks
tcg/s390: Support split-wx code generation
tcg/s390: Use tcg_tbrel_diff
tcg/sparc: Support split-wx code generation
tcg/sparc: Use tcg_tbrel_diff
tcg/ppc: Support split-wx code generation
tcg/ppc: Use tcg_out_mem_long to reset TCG_REG_TB
tcg/ppc: Use tcg_tbrel_diff
tcg: Introduce tcg_tbrel_diff
tcg/tci: Push const down through bytecode reading
disas: Push const down through host disassembly
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
67 files changed, 1033 insertions, 628 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index fa325bb3d8..e0df9b6a1d 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -154,14 +154,13 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu) * TCG is not considered a security-sensitive part of QEMU so this does not * affect the impact of CFI in environment with high security requirements */ -QEMU_DISABLE_CFI -static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) +static inline TranslationBlock * QEMU_DISABLE_CFI +cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) { CPUArchState *env = cpu->env_ptr; uintptr_t ret; TranslationBlock *last_tb; - int tb_exit; - uint8_t *tb_ptr = itb->tc.ptr; + const void *tb_ptr = itb->tc.ptr; qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc, "Trace %d: %p [" @@ -188,11 +187,20 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) ret = tcg_qemu_tb_exec(env, tb_ptr); cpu->can_do_io = 1; - last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK); - tb_exit = ret & TB_EXIT_MASK; - trace_exec_tb_exit(last_tb, tb_exit); + /* + * TODO: Delay swapping back to the read-write region of the TB + * until we actually need to modify the TB. The read-only copy, + * coming from the rx region, shares the same host TLB entry as + * the code that executed the exit_tb opcode that arrived here. + * If we insist on touching both the RX and the RW pages, we + * double the host TLB pressure. + */ + last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK)); + *tb_exit = ret & TB_EXIT_MASK; + + trace_exec_tb_exit(last_tb, *tb_exit); - if (tb_exit > TB_EXIT_IDX1) { + if (*tb_exit > TB_EXIT_IDX1) { /* We didn't start executing this TB (eg because the instruction * counter hit zero); we must restore the guest PC to the address * of the start of the TB. @@ -210,7 +218,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) cc->set_pc(cpu, last_tb->pc); } } - return ret; + return last_tb; } #ifndef CONFIG_USER_ONLY @@ -221,6 +229,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, { TranslationBlock *tb; uint32_t cflags = curr_cflags() | CF_NOCACHE; + int tb_exit; if (ignore_icount) { cflags &= ~CF_USE_ICOUNT; @@ -238,7 +247,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, /* execute the generated code */ trace_exec_tb_nocache(tb, tb->pc); - cpu_tb_exec(cpu, tb); + cpu_tb_exec(cpu, tb, &tb_exit); mmap_lock(); tb_phys_invalidate(tb, -1); @@ -272,6 +281,7 @@ void cpu_exec_step_atomic(CPUState *cpu) uint32_t flags; uint32_t cflags = 1; uint32_t cf_mask = cflags & CF_HASH_MASK; + int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { start_exclusive(); @@ -288,7 +298,7 @@ void cpu_exec_step_atomic(CPUState *cpu) cpu_exec_enter(cpu); /* execute the generated code */ trace_exec_tb(tb, pc); - cpu_tb_exec(cpu, tb); + cpu_tb_exec(cpu, tb, &tb_exit); cpu_exec_exit(cpu); } else { /* @@ -382,7 +392,9 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) if (TCG_TARGET_HAS_direct_jump) { uintptr_t offset = tb->jmp_target_arg[n]; uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr; - tb_target_set_jmp_target(tc_ptr, tc_ptr + offset, addr); + uintptr_t jmp_rx = tc_ptr + offset; + uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; + tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr); } else { tb->jmp_target_arg[n] = addr; } @@ -682,13 +694,10 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, TranslationBlock **last_tb, int *tb_exit) { - uintptr_t ret; int32_t insns_left; trace_exec_tb(tb, tb->pc); - ret = cpu_tb_exec(cpu, tb); - tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK); - *tb_exit = ret & TB_EXIT_MASK; + tb = cpu_tb_exec(cpu, tb, tb_exit); if (*tb_exit != TB_EXIT_REQUESTED) { *last_tb = tb; return; diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 1ac0b76515..2eea8c32ee 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -38,6 +38,7 @@ struct TCGState { AccelState parent_obj; bool mttcg_enabled; + int splitwx_enabled; unsigned long tb_size; }; typedef struct TCGState TCGState; @@ -94,6 +95,13 @@ static void tcg_accel_instance_init(Object *obj) TCGState *s = TCG_STATE(obj); s->mttcg_enabled = default_mttcg_enabled(); + + /* If debugging enabled, default "auto on", otherwise off. */ +#ifdef CONFIG_DEBUG_TCG + s->splitwx_enabled = -1; +#else + s->splitwx_enabled = 0; +#endif } bool mttcg_enabled; @@ -102,7 +110,7 @@ static int tcg_init(MachineState *ms) { TCGState *s = TCG_STATE(current_accel()); - tcg_exec_init(s->tb_size * 1024 * 1024); + tcg_exec_init(s->tb_size * 1024 * 1024, s->splitwx_enabled); mttcg_enabled = s->mttcg_enabled; /* @@ -179,6 +187,18 @@ static void tcg_set_tb_size(Object *obj, Visitor *v, s->tb_size = value; } +static bool tcg_get_splitwx(Object *obj, Error **errp) +{ + TCGState *s = TCG_STATE(obj); + return s->splitwx_enabled; +} + +static void tcg_set_splitwx(Object *obj, bool value, Error **errp) +{ + TCGState *s = TCG_STATE(obj); + s->splitwx_enabled = value; +} + static void tcg_accel_class_init(ObjectClass *oc, void *data) { AccelClass *ac = ACCEL_CLASS(oc); @@ -196,6 +216,10 @@ static void tcg_accel_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "tb-size", "TCG translation block cache size"); + object_class_property_add_bool(oc, "split-wx", + tcg_get_splitwx, tcg_set_splitwx); + object_class_property_set_description(oc, "split-wx", + "Map jit pages into separate RW and RX regions"); } static const TypeInfo tcg_accel_type = { diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 446465a09a..d736f4ff55 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -145,7 +145,7 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg) return ctpop64(arg); } -void *HELPER(lookup_tb_ptr)(CPUArchState *env) +const void *HELPER(lookup_tb_ptr)(CPUArchState *env) { CPUState *cpu = env_cpu(env); TranslationBlock *tb; @@ -154,7 +154,7 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env) tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags()); if (tb == NULL) { - return tcg_ctx->code_gen_epilogue; + return tcg_code_gen_epilogue; } qemu_log_mask_and_addr(CPU_LOG_EXEC, pc, "Chain %d: %p [" diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 2e36d6eb0c..91a5b7e85f 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -24,7 +24,7 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env) +DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, cptr, env) DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events index 385b9f749b..6eefb37f5d 100644 --- a/accel/tcg/trace-events +++ b/accel/tcg/trace-events @@ -7,4 +7,4 @@ exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=0x%x" # translate-all.c -translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" +translate_block(void *tb, uintptr_t pc, const void *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index a1803a1026..e9de6ff9dd 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -59,6 +59,7 @@ #include "sysemu/cpus.h" #include "sysemu/cpu-timers.h" #include "sysemu/tcg.h" +#include "qapi/error.h" /* #define DEBUG_TB_INVALIDATE */ /* #define DEBUG_TB_FLUSH */ @@ -269,9 +270,9 @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val) /* Decode a signed leb128 sequence at *PP; increment *PP past the decoded value. Return the decoded value. */ -static target_long decode_sleb128(uint8_t **pp) +static target_long decode_sleb128(const uint8_t **pp) { - uint8_t *p = *pp; + const uint8_t *p = *pp; target_long val = 0; int byte, shift = 0; @@ -342,7 +343,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; uintptr_t host_pc = (uintptr_t)tb->tc.ptr; CPUArchState *env = cpu->env_ptr; - uint8_t *p = tb->tc.ptr + tb->tc.size; + const uint8_t *p = tb->tc.ptr + tb->tc.size; int i, j, num_insns = tb->icount; #ifdef CONFIG_PROFILER TCGProfile *prof = &tcg_ctx->prof; @@ -392,27 +393,18 @@ void tb_destroy(TranslationBlock *tb) bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) { - TranslationBlock *tb; - bool r = false; - uintptr_t check_offset; - - /* The host_pc has to be in the region of current code buffer. If - * it is not we will not be able to resolve it here. The two cases - * where host_pc will not be correct are: + /* + * The host_pc has to be in the rx region of the code buffer. + * If it is not we will not be able to resolve it here. + * The two cases where host_pc will not be correct are: * * - fault during translation (instruction fetch) * - fault from helper (not using GETPC() macro) * * Either way we need return early as we can't resolve it here. - * - * We are using unsigned arithmetic so if host_pc < - * tcg_init_ctx.code_gen_buffer check_offset will wrap to way - * above the code_gen_buffer_size */ - check_offset = host_pc - (uintptr_t) tcg_init_ctx.code_gen_buffer; - - if (check_offset < tcg_init_ctx.code_gen_buffer_size) { - tb = tcg_tb_lookup(host_pc); + if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { + TranslationBlock *tb = tcg_tb_lookup(host_pc); if (tb) { cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); if (tb_cflags(tb) & CF_NOCACHE) { @@ -421,11 +413,10 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) tcg_tb_remove(tb); tb_destroy(tb); } - r = true; + return true; } } - - return r; + return false; } static void page_init(void) @@ -973,7 +964,7 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) -static inline size_t size_code_gen_buffer(size_t tb_size) +static size_t size_code_gen_buffer(size_t tb_size) { /* Size the buffer. */ if (tb_size == 0) { @@ -1024,22 +1015,27 @@ static inline void *split_cross_256mb(void *buf1, size_t size1) static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] __attribute__((aligned(CODE_GEN_ALIGN))); -static inline void *alloc_code_gen_buffer(void) +static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp) { - void *buf = static_code_gen_buffer; - void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer); + void *buf, *end; size_t size; + if (splitwx > 0) { + error_setg(errp, "jit split-wx not supported"); + return false; + } + /* page-align the beginning and end of the buffer */ + buf = static_code_gen_buffer; + end = static_code_gen_buffer + sizeof(static_code_gen_buffer); buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size); end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size); size = end - buf; /* Honor a command-line option limiting the size of the buffer. */ - if (size > tcg_ctx->code_gen_buffer_size) { - size = QEMU_ALIGN_DOWN(tcg_ctx->code_gen_buffer_size, - qemu_real_host_page_size); + if (size > tb_size) { + size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size); } tcg_ctx->code_gen_buffer_size = size; @@ -1051,31 +1047,49 @@ static inline void *alloc_code_gen_buffer(void) #endif if (qemu_mprotect_rwx(buf, size)) { - abort(); + error_setg_errno(errp, errno, "mprotect of jit buffer"); + return false; } qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); - return buf; + tcg_ctx->code_gen_buffer = buf; + return true; } #elif defined(_WIN32) -static inline void *alloc_code_gen_buffer(void) +static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) { - size_t size = tcg_ctx->code_gen_buffer_size; - return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, - PAGE_EXECUTE_READWRITE); + void *buf; + + if (splitwx > 0) { + error_setg(errp, "jit split-wx not supported"); + return false; + } + + buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + if (buf == NULL) { + error_setg_win32(errp, GetLastError(), + "allocate %zu bytes for jit buffer", size); + return false; + } + + tcg_ctx->code_gen_buffer = buf; + tcg_ctx->code_gen_buffer_size = size; + return true; } #else -static inline void *alloc_code_gen_buffer(void) +static bool alloc_code_gen_buffer_anon(size_t size, int prot, + int flags, Error **errp) { - int prot = PROT_WRITE | PROT_READ | PROT_EXEC; - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - size_t size = tcg_ctx->code_gen_buffer_size; void *buf; buf = mmap(NULL, size, prot, flags, -1, 0); if (buf == MAP_FAILED) { - return NULL; + error_setg_errno(errp, errno, + "allocate %zu bytes for jit buffer", size); + return false; } + tcg_ctx->code_gen_buffer_size = size; #ifdef __mips__ if (cross_256mb(buf, size)) { @@ -1114,19 +1128,183 @@ static inline void *alloc_code_gen_buffer(void) /* Request large pages for the buffer. */ qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); - return buf; + tcg_ctx->code_gen_buffer = buf; + return true; } -#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ -static inline void code_gen_alloc(size_t tb_size) +#ifndef CONFIG_TCG_INTERPRETER +#ifdef CONFIG_POSIX +#include "qemu/memfd.h" + +static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) { - tcg_ctx->code_gen_buffer_size = size_code_gen_buffer(tb_size); - tcg_ctx->code_gen_buffer = alloc_code_gen_buffer(); - if (tcg_ctx->code_gen_buffer == NULL) { - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); - exit(1); + void *buf_rw = NULL, *buf_rx = MAP_FAILED; + int fd = -1; + +#ifdef __mips__ + /* Find space for the RX mapping, vs the 256MiB regions. */ + if (!alloc_code_gen_buffer_anon(size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | + MAP_NORESERVE, errp)) { + return false; + } + /* The size of the mapping may have been adjusted. */ + size = tcg_ctx->code_gen_buffer_size; + buf_rx = tcg_ctx->code_gen_buffer; +#endif + + buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp); + if (buf_rw == NULL) { + goto fail; + } + +#ifdef __mips__ + void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC, + MAP_SHARED | MAP_FIXED, fd, 0); + if (tmp != buf_rx) { + goto fail_rx; + } +#else + buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + if (buf_rx == MAP_FAILED) { + goto fail_rx; + } +#endif + + close(fd); + tcg_ctx->code_gen_buffer = buf_rw; + tcg_ctx->code_gen_buffer_size = size; + tcg_splitwx_diff = buf_rx - buf_rw; + + /* Request large pages for the buffer and the splitwx. */ + qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE); + qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE); + return true; + + fail_rx: + error_setg_errno(errp, errno, "failed to map shared memory for execute"); + fail: + if (buf_rx != MAP_FAILED) { + munmap(buf_rx, size); } + if (buf_rw) { + munmap(buf_rw, size); + } + if (fd >= 0) { + close(fd); + } + return false; } +#endif /* CONFIG_POSIX */ + +#ifdef CONFIG_DARWIN +#include <mach/mach.h> + +extern kern_return_t mach_vm_remap(vm_map_t target_task, + mach_vm_address_t *target_address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + vm_map_t src_task, + mach_vm_address_t src_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance); + +static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) +{ + kern_return_t ret; + mach_vm_address_t buf_rw, buf_rx; + vm_prot_t cur_prot, max_prot; + + /* Map the read-write portion via normal anon memory. */ + if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, errp)) { + return false; + } + + buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer; + buf_rx = 0; + ret = mach_vm_remap(mach_task_self(), + &buf_rx, + size, + 0, + VM_FLAGS_ANYWHERE, + mach_task_self(), + buf_rw, + false, + &cur_prot, + &max_prot, + VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) { + /* TODO: Convert "ret" to a human readable error message. */ + error_setg(errp, "vm_remap for jit splitwx failed"); + munmap((void *)buf_rw, size); + return false; + } + + if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) { + error_setg_errno(errp, errno, "mprotect for jit splitwx"); + munmap((void *)buf_rx, size); + munmap((void *)buf_rw, size); + return false; + } + + tcg_splitwx_diff = buf_rx - buf_rw; + return true; +} +#endif /* CONFIG_DARWIN */ +#endif /* CONFIG_TCG_INTERPRETER */ + +static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp) +{ +#ifndef CONFIG_TCG_INTERPRETER +# ifdef CONFIG_DARWIN + return alloc_code_gen_buffer_splitwx_vmremap(size, errp); +# endif +# ifdef CONFIG_POSIX + return alloc_code_gen_buffer_splitwx_memfd(size, errp); +# endif +#endif + error_setg(errp, "jit split-wx not supported"); + return false; +} + +static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) +{ + ERRP_GUARD(); + int prot, flags; + + if (splitwx) { + if (alloc_code_gen_buffer_splitwx(size, errp)) { + return true; + } + /* + * If splitwx force-on (1), fail; + * if splitwx default-on (-1), fall through to splitwx off. + */ + if (splitwx > 0) { + return false; + } + error_free_or_abort(errp); + } + + prot = PROT_READ | PROT_WRITE | PROT_EXEC; + flags = MAP_PRIVATE | MAP_ANONYMOUS; +#ifdef CONFIG_TCG_INTERPRETER + /* The tcg interpreter does not need execute permission. */ + prot = PROT_READ | PROT_WRITE; +#elif defined(CONFIG_DARWIN) + /* Applicable to both iOS and macOS (Apple Silicon). */ + if (!splitwx) { + flags |= MAP_JIT; + } +#endif + + return alloc_code_gen_buffer_anon(size, prot, flags, errp); +} +#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */ static bool tb_cmp(const void *ap, const void *bp) { @@ -1152,13 +1330,19 @@ static void tb_htable_init(void) /* Must be called before using the QEMU cpus. 'tb_size' is the size (in bytes) allocated to the translation buffer. Zero means default size. */ -void tcg_exec_init(unsigned long tb_size) +void tcg_exec_init(unsigned long tb_size, int splitwx) { + bool ok; + tcg_allowed = true; cpu_gen_init(); page_init(); tb_htable_init(); - code_gen_alloc(tb_size); + + ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size), + splitwx, &error_fatal); + assert(ok); + #if defined(CONFIG_SOFTMMU) /* There's no guest base to take into account, so go ahead and initialize the prologue now. */ @@ -1722,7 +1906,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } gen_code_buf = tcg_ctx->code_gen_ptr; - tb->tc.ptr = gen_code_buf; + tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); tb->pc = pc; tb->cs_base = cs_base; tb->flags = flags; @@ -1816,15 +2000,19 @@ TranslationBlock *tb_gen_code(CPUState *cpu, if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && qemu_log_in_addr_range(tb->pc)) { FILE *logfile = qemu_log_lock(); - int code_size, data_size = 0; + int code_size, data_size; + const tcg_target_ulong *rx_data_gen_ptr; size_t chunk_start; int insn = 0; if (tcg_ctx->data_gen_ptr) { - code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr; + rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); + code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; data_size = gen_code_size - code_size; } else { + rx_data_gen_ptr = 0; code_size = gen_code_size; + data_size = 0; } /* Dump header and the first instruction */ @@ -1859,16 +2047,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, if (data_size) { int i; qemu_log(" data: [size=%d]\n", data_size); - for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { - if (sizeof(tcg_target_ulong) == 8) { - qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", - (uintptr_t)tcg_ctx->data_gen_ptr + i, - *(uint64_t *)(tcg_ctx->data_gen_ptr + i)); - } else { - qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", - (uintptr_t)tcg_ctx->data_gen_ptr + i, - *(uint32_t *)(tcg_ctx->data_gen_ptr + i)); - } + for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { + qemu_log("0x%08" PRIxPTR ": .quad 0x%" TCG_PRIlx "\n", + (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); } } qemu_log("\n"); diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index fb1e19c585..a49a794065 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -133,8 +133,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, } /* The disas_log hook may use these values rather than recompute. */ - db->tb->size = db->pc_next - db->pc_first; - db->tb->icount = db->num_insns; + tb->size = db->pc_next - db->pc_first; + tb->icount = db->num_insns; #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) diff --git a/bsd-user/main.c b/bsd-user/main.c index 9c700c6234..65163e1396 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -909,7 +909,7 @@ int main(int argc, char **argv) } /* init tcg before creating CPUs and to get qemu_host_page_size */ - tcg_exec_init(0); + tcg_exec_init(0, false); cpu_type = parse_cpu_option(cpu_model); cpu = cpu_create(cpu_type); @@ -299,7 +299,7 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size) } /* Disassemble this for me please... (debugging). */ -void disas(FILE *out, void *code, unsigned long size) +void disas(FILE *out, const void *code, unsigned long size) { uintptr_t pc; int count; diff --git a/disas/capstone.c b/disas/capstone.c index 7462c0e305..20bc8f9669 100644 --- a/disas/capstone.c +++ b/disas/capstone.c @@ -229,7 +229,7 @@ bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size) } /* Disassemble SIZE bytes at CODE for the host. */ -bool cap_disas_host(disassemble_info *info, void *code, size_t size) +bool cap_disas_host(disassemble_info *info, const void *code, size_t size) { csh handle; const uint8_t *cbuf; diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h index 2164762b46..d1133a4e04 100644 --- a/include/disas/dis-asm.h +++ b/include/disas/dis-asm.h @@ -358,7 +358,7 @@ typedef struct disassemble_info { (bfd_vma addr, struct disassemble_info * info); /* These are for buffer_read_memory. */ - bfd_byte *buffer; + const bfd_byte *buffer; bfd_vma buffer_vma; int buffer_length; @@ -462,7 +462,7 @@ int print_insn_rx(bfd_vma, disassemble_info *); #ifdef CONFIG_CAPSTONE bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size); -bool cap_disas_host(disassemble_info *info, void *code, size_t size); +bool cap_disas_host(disassemble_info *info, const void *code, size_t size); bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count); bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size); #else diff --git a/include/disas/disas.h b/include/disas/disas.h index 36c33f6f19..d363e95ede 100644 --- a/include/disas/disas.h +++ b/include/disas/disas.h @@ -7,7 +7,7 @@ #include "cpu.h" /* Disassemble this for me please... (debugging). */ -void disas(FILE *out, void *code, unsigned long size); +void disas(FILE *out, const void *code, unsigned long size); void target_disas(FILE *out, CPUState *cpu, target_ulong code, target_ulong size); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index fab573da06..2e5b4bba48 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -448,7 +448,7 @@ int probe_access_flags(CPUArchState *env, target_ulong addr, * Note: the address of search data can be obtained by adding @size to @ptr. */ struct tb_tc { - void *ptr; /* pointer to the translated code */ + const void *ptr; /* pointer to the translated code */ size_t size; }; diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 822c43cfd3..aa4b44354a 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -32,7 +32,7 @@ static inline void gen_io_end(void) tcg_temp_free_i32(tmp); } -static inline void gen_tb_start(TranslationBlock *tb) +static inline void gen_tb_start(const TranslationBlock *tb) { TCGv_i32 count, imm; @@ -71,7 +71,7 @@ static inline void gen_tb_start(TranslationBlock *tb) tcg_temp_free_i32(count); } -static inline void gen_tb_end(TranslationBlock *tb, int num_insns) +static inline void gen_tb_end(const TranslationBlock *tb, int num_insns) { if (tb_cflags(tb) & CF_USE_ICOUNT) { /* Update the num_insn immediate parameter now that we know diff --git a/include/exec/log.h b/include/exec/log.h index e02fff5de1..3c7fa65ead 100644 --- a/include/exec/log.h +++ b/include/exec/log.h @@ -56,7 +56,7 @@ static inline void log_target_disas(CPUState *cpu, target_ulong start, rcu_read_unlock(); } -static inline void log_disas(void *code, unsigned long size) +static inline void log_disas(const void *code, unsigned long size) { QemuLogFile *logfile; rcu_read_lock(); diff --git a/include/exec/translator.h b/include/exec/translator.h index 638e1529c5..24232ead41 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -67,7 +67,7 @@ typedef enum DisasJumpType { * Architecture-agnostic disassembly context. */ typedef struct DisasContextBase { - TranslationBlock *tb; + const TranslationBlock *tb; target_ulong pc_first; target_ulong pc_next; DisasJumpType is_jmp; diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 8e7552910d..140fa32a5e 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -189,7 +189,8 @@ struct CPUClass { void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, Error **errp); void (*set_pc)(CPUState *cpu, vaddr value); - void (*synchronize_from_tb)(CPUState *cpu, struct TranslationBlock *tb); + void (*synchronize_from_tb)(CPUState *cpu, + const struct TranslationBlock *tb); bool (*tlb_fill)(CPUState *cpu, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); diff --git a/include/qemu/cacheflush.h b/include/qemu/cacheflush.h index 58ae488491..ae20bcda73 100644 --- a/include/qemu/cacheflush.h +++ b/include/qemu/cacheflush.h @@ -8,16 +8,27 @@ #ifndef QEMU_CACHEFLUSH_H #define QEMU_CACHEFLUSH_H +/** + * flush_idcache_range: + * @rx: instruction address + * @rw: data address + * @len: length to flush + * + * Flush @len bytes of the data cache at @rw and the icache at @rx + * to bring them in sync. The two addresses may be different virtual + * mappings of the same physical page(s). + */ + #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) -static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { /* icache is coherent and does not require flushing. */ } #else -void flush_icache_range(uintptr_t start, uintptr_t stop); +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len); #endif diff --git a/include/sysemu/tcg.h b/include/sysemu/tcg.h index d9d3ca8559..00349fb18a 100644 --- a/include/sysemu/tcg.h +++ b/include/sysemu/tcg.h @@ -8,7 +8,8 @@ #ifndef SYSEMU_TCG_H #define SYSEMU_TCG_H -void tcg_exec_init(unsigned long tb_size); +void tcg_exec_init(unsigned long tb_size, int splitwx); + #ifdef CONFIG_TCG extern bool tcg_allowed; #define tcg_enabled() (tcg_allowed) diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 5b3bdacc39..901b19f32a 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -805,7 +805,7 @@ static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1, * be NULL and @idx should be 0. Otherwise, @tb should be valid and * @idx should be one of the TB_EXIT_ values. */ -void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx); +void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx); /** * tcg_gen_goto_tb() - output goto_tb TCG operation diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 67092e82c6..70a76646c4 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -211,6 +211,11 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1, DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +/* Only used by i386 to cope with stupid register constraints. */ +DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1, + TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | + IMPL(TCG_TARGET_HAS_qemu_st8_i32)) + /* Host vector support. */ #define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 8ff9dad4ef..95fe5604eb 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -261,7 +261,7 @@ struct TCGLabel { unsigned refs : 16; union { uintptr_t value; - tcg_insn_unit *value_ptr; + const tcg_insn_unit *value_ptr; } u; QSIMPLEQ_HEAD(, TCGRelocation) relocs; QSIMPLEQ_ENTRY(TCGLabel) next; @@ -621,8 +621,6 @@ struct TCGContext { here, because there's too much arithmetic throughout that relies on addition and subtraction working on bytes. Rely on the GCC extension that allows arithmetic on void*. */ - void *code_gen_prologue; - void *code_gen_epilogue; void *code_gen_buffer; size_t code_gen_buffer_size; void *code_gen_ptr; @@ -679,8 +677,36 @@ struct TCGContext { extern TCGContext tcg_init_ctx; extern __thread TCGContext *tcg_ctx; +extern const void *tcg_code_gen_epilogue; +extern uintptr_t tcg_splitwx_diff; extern TCGv_env cpu_env; +static inline bool in_code_gen_buffer(const void *p) +{ + const TCGContext *s = &tcg_init_ctx; + /* + * Much like it is valid to have a pointer to the byte past the + * end of an array (so long as you don't dereference it), allow + * a pointer to the byte past the end of the code gen buffer. + */ + return (size_t)(p - s->code_gen_buffer) <= s->code_gen_buffer_size; +} + +#ifdef CONFIG_DEBUG_TCG +const void *tcg_splitwx_to_rx(void *rw); +void *tcg_splitwx_to_rw(const void *rx); +#else +static inline const void *tcg_splitwx_to_rx(void *rw) +{ + return rw ? rw + tcg_splitwx_diff : NULL; +} + +static inline void *tcg_splitwx_to_rw(const void *rx) +{ + return rx ? (void *)rx - tcg_splitwx_diff : NULL; +} +#endif + static inline size_t temp_idx(TCGTemp *ts) { ptrdiff_t n = ts - tcg_ctx->temps; @@ -1101,7 +1127,7 @@ static inline TCGLabel *arg_label(TCGArg i) * correct result. */ -static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) +static inline ptrdiff_t tcg_ptr_byte_diff(const void *a, const void *b) { return a - b; } @@ -1115,9 +1141,22 @@ static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) * to the destination address. */ -static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, void *target) +static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, const void *target) +{ + return tcg_ptr_byte_diff(target, tcg_splitwx_to_rx(s->code_ptr)); +} + +/** + * tcg_tbrel_diff + * @s: the tcg context + * @target: address of the target + * + * Produce a difference, from the beginning of the current TB code + * to the destination address. + */ +static inline ptrdiff_t tcg_tbrel_diff(TCGContext *s, const void *target) { - return tcg_ptr_byte_diff(target, s->code_ptr); + return tcg_ptr_byte_diff(target, tcg_splitwx_to_rx(s->code_buf)); } /** @@ -1222,14 +1261,14 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) #define TB_EXIT_IDXMAX 1 #define TB_EXIT_REQUESTED 3 -#ifdef HAVE_TCG_QEMU_TB_EXEC -uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); +#ifdef CONFIG_TCG_INTERPRETER +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, const void *tb_ptr); #else -# define tcg_qemu_tb_exec(env, tb_ptr) \ - ((uintptr_t (*)(void *, void *))tcg_ctx->code_gen_prologue)(env, tb_ptr) +typedef uintptr_t tcg_prologue_fn(CPUArchState *env, const void *tb_ptr); +extern tcg_prologue_fn *tcg_qemu_tb_exec; #endif -void tcg_register_jit(void *buf, size_t buf_size); +void tcg_register_jit(const void *buf, size_t buf_size); #if TCG_TARGET_MAYBE_vec /* Return zero if the tuple (opc, type, vece) is unsupportable; diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h index 661b5daa9f..7193c3b226 100644 --- a/linux-user/ioctls.h +++ b/linux-user/ioctls.h @@ -748,8 +748,10 @@ IOCTL(TUNSETQUEUE, IOC_W, MK_PTR(MK_STRUCT(STRUCT_short_ifreq))) IOCTL(TUNSETIFINDEX , IOC_W, MK_PTR(TYPE_INT)) /* TUNGETFILTER is not supported: see TUNATTACHFILTER. */ +#ifdef TUNSETVNETLE IOCTL(TUNSETVNETLE, IOC_W, MK_PTR(TYPE_INT)) IOCTL(TUNGETVNETLE, IOC_R, MK_PTR(TYPE_INT)) +#endif #ifdef TUNSETVNETBE IOCTL(TUNSETVNETBE, IOC_W, MK_PTR(TYPE_INT)) IOCTL(TUNGETVNETBE, IOC_R, MK_PTR(TYPE_INT)) diff --git a/linux-user/main.c b/linux-user/main.c index 750a01118f..bb4e55e8fc 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -701,7 +701,7 @@ int main(int argc, char **argv, char **envp) cpu_type = parse_cpu_option(cpu_model); /* init tcg before creating CPUs and to get qemu_host_page_size */ - tcg_exec_init(0); + tcg_exec_init(0, false); cpu = cpu_create(cpu_type); env = cpu->env_ptr; diff --git a/qemu-options.hx b/qemu-options.hx index 459c916d3d..1698a0c751 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -115,6 +115,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel, " igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n" " kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n" " kvm-shadow-mem=size of KVM shadow MMU in bytes\n" + " split-wx=on|off (enable TCG split w^x mapping)\n" " tb-size=n (TCG translation block cache size)\n" " thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL) SRST @@ -140,6 +141,12 @@ SRST ``kvm-shadow-mem=size`` Defines the size of the KVM shadow MMU. + ``split-wx=on|off`` + Controls the use of split w^x mapping for the TCG code generation + buffer. Some operating systems require this to be enabled, and in + such a case this will default on. On other operating systems, this + will default off, but one may enable this for testing or debugging. + ``tb-size=n`` Controls the size (in MiB) of the TCG translation block cache. diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 8b9ffc41c2..6301f4f0a5 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2946,7 +2946,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, invalidate_and_set_dirty(mr, addr1, l); break; case FLUSH_CACHE: - flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l); + flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l); break; } } diff --git a/target/arm/cpu.c b/target/arm/cpu.c index d6188f6566..62e319eb6a 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -54,7 +54,8 @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value) } } -static void arm_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void arm_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 2e3fdfdf6b..ef63edfc68 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -410,7 +410,7 @@ static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest) static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) { - TranslationBlock *tb; + const TranslationBlock *tb; tb = s->base.tb; if (use_goto_tb(s, n, dest)) { diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 5d9c4ad5bf..6f3d5a9e4a 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -41,7 +41,8 @@ static bool avr_cpu_has_work(CPUState *cs) && cpu_interrupts_enabled(env); } -static void avr_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void avr_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { AVRCPU *cpu = AVR_CPU(cs); CPUAVRState *env = &cpu->env; diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 71b6aca45d..e28f047d10 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -35,7 +35,8 @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.iaoq_b = value + 4; } -static void hppa_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void hppa_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { HPPACPU *cpu = HPPA_CPU(cs); diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index 628dd29fe7..4fa013720e 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -49,7 +49,8 @@ static void x86_cpu_exec_exit(CPUState *cs) env->eflags = cpu_compute_eflags(env); } -static void x86_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void x86_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { X86CPU *cpu = X86_CPU(cs); diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 9b2482159d..c8e754cfb1 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -83,7 +83,8 @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.iflags = 0; } -static void mb_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void mb_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 2283214c87..b2864d67d7 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -47,7 +47,8 @@ static void mips_cpu_set_pc(CPUState *cs, vaddr value) } } -static void mips_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void mips_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { MIPSCPU *cpu = MIPS_CPU(cs); CPUMIPSState *env = &cpu->env; diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 254cd83f8b..8227d7aea9 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -314,7 +314,8 @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value) env->pc = value; } -static void riscv_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void riscv_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; diff --git a/target/rx/cpu.c b/target/rx/cpu.c index 23ee17a701..2bb14144a7 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -33,7 +33,8 @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.pc = value; } -static void rx_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void rx_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { RXCPU *cpu = RX_CPU(cs); diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index 3c68021c56..1e0f05a15b 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -34,7 +34,8 @@ static void superh_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.pc = value; } -static void superh_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void superh_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { SuperHCPU *cpu = SUPERH_CPU(cs); diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index d8a8bb1dda..6f14e370ed 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -691,7 +691,8 @@ static void sparc_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.npc = value + 4; } -static void sparc_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static void sparc_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) { SPARCCPU *cpu = SPARC_CPU(cs); diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index 2f2e5b029f..4bff1d4718 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -42,7 +42,7 @@ static void tricore_cpu_set_pc(CPUState *cs, vaddr value) } static void tricore_cpu_synchronize_from_tb(CPUState *cs, - TranslationBlock *tb) + const TranslationBlock *tb) { TriCoreCPU *cpu = TRICORE_CPU(cs); CPUTriCoreState *env = &cpu->env; diff --git a/tcg/README b/tcg/README index 2f051e5c97..0cf9e2727c 100644 --- a/tcg/README +++ b/tcg/README @@ -502,6 +502,7 @@ goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). * qemu_ld_i32/i64 t0, t1, flags, memidx * qemu_st_i32/i64 t0, t1, flags, memidx +* qemu_st8_i32 t0, t1, flags, memidx Load data at the guest address t1 into t0, or store data in t0 at guest address t1. The _i32/_i64 size applies to the size of the input/output @@ -518,6 +519,10 @@ of the memory access. For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a 64-bit memory access specified in flags. +For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of +the memory operation is known to be 8-bit. This allows the backend to +provide a different set of register constraints. + ********* Host vector operations All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE. diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 26f71cb599..ab199b143f 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -78,38 +78,42 @@ static const int tcg_target_call_oarg_regs[1] = { #define TCG_REG_GUEST_BASE TCG_REG_X28 #endif -static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - ptrdiff_t offset = target - code_ptr; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t offset = target - src_rx; + if (offset == sextract64(offset, 0, 26)) { /* read instruction, mask away previous PC_REL26 parameter contents, set the proper offset, then write back the instruction. */ - *code_ptr = deposit32(*code_ptr, 0, 26, offset); + *src_rw = deposit32(*src_rw, 0, 26, offset); return true; } return false; } -static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - ptrdiff_t offset = target - code_ptr; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t offset = target - src_rx; + if (offset == sextract64(offset, 0, 19)) { - *code_ptr = deposit32(*code_ptr, 5, 19, offset); + *src_rw = deposit32(*src_rw, 5, 19, offset); return true; } return false; } -static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) { tcg_debug_assert(addend == 0); switch (type) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: - return reloc_pc26(code_ptr, (tcg_insn_unit *)value); + return reloc_pc26(code_ptr, (const tcg_insn_unit *)value); case R_AARCH64_CONDBR19: - return reloc_pc19(code_ptr, (tcg_insn_unit *)value); + return reloc_pc19(code_ptr, (const tcg_insn_unit *)value); default: g_assert_not_reached(); } @@ -1050,12 +1054,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, /* Look for host pointer values within 4G of the PC. This happens often when loading pointers to QEMU's own data structures. */ if (type == TCG_TYPE_I64) { - tcg_target_long disp = value - (intptr_t)s->code_ptr; + intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr); + tcg_target_long disp = value - src_rx; if (disp == sextract64(disp, 0, 21)) { tcg_out_insn(s, 3406, ADR, rd, disp); return; } - disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); + disp = (value >> 12) - (src_rx >> 12); if (disp == sextract64(disp, 0, 21)) { tcg_out_insn(s, 3406, ADRP, rd, disp); if (value & 0xfff) { @@ -1306,18 +1311,18 @@ static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, } } -static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) +static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) { - ptrdiff_t offset = target - s->code_ptr; + ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; tcg_debug_assert(offset == sextract64(offset, 0, 26)); tcg_out_insn(s, 3206, B, offset); } -static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) +static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) { - ptrdiff_t offset = target - s->code_ptr; + ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; if (offset == sextract64(offset, 0, 26)) { - tcg_out_insn(s, 3206, BL, offset); + tcg_out_insn(s, 3206, B, offset); } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); tcg_out_insn(s, 3207, BR, TCG_REG_TMP); @@ -1329,9 +1334,9 @@ static inline void tcg_out_callr(TCGContext *s, TCGReg reg) tcg_out_insn(s, 3207, BLR, reg); } -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) { - ptrdiff_t offset = target - s->code_ptr; + ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; if (offset == sextract64(offset, 0, 26)) { tcg_out_insn(s, 3206, BL, offset); } else { @@ -1340,21 +1345,21 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) } } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, - uintptr_t addr) +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) { tcg_insn_unit i1, i2; TCGType rt = TCG_TYPE_I64; TCGReg rd = TCG_REG_TMP; uint64_t pair; - ptrdiff_t offset = addr - jmp_addr; + ptrdiff_t offset = addr - jmp_rx; if (offset == sextract64(offset, 0, 26)) { i1 = I3206_B | ((offset >> 2) & 0x3ffffff); i2 = NOP; } else { - offset = (addr >> 12) - (jmp_addr >> 12); + offset = (addr >> 12) - (jmp_rx >> 12); /* patch ADRP */ i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; @@ -1362,8 +1367,8 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; } pair = (uint64_t)i2 << 32 | i1; - qatomic_set((uint64_t *)jmp_addr, pair); - flush_icache_range(jmp_addr, jmp_addr + 8); + qatomic_set((uint64_t *)jmp_rw, pair); + flush_idcache_range(jmp_rx, jmp_rw, 8); } static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) @@ -1393,7 +1398,7 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); offset = tcg_in32(s) >> 5; } else { - offset = l->u.value_ptr - s->code_ptr; + offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2; tcg_debug_assert(offset == sextract64(offset, 0, 19)); } @@ -1568,7 +1573,7 @@ static void * const qemu_st_helpers[16] = { [MO_BEQ] = helper_be_stq_mmu, }; -static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) +static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) { ptrdiff_t offset = tcg_pcrel_diff(s, target); tcg_debug_assert(offset == sextract64(offset, 0, 21)); @@ -1581,7 +1586,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) MemOp opc = get_memop(oi); MemOp size = opc & MO_SIZE; - if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { + if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -1606,7 +1611,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) MemOp opc = get_memop(oi); MemOp size = opc & MO_SIZE; - if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) { + if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -1631,7 +1636,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->type = ext; label->datalo_reg = data_reg; label->addrlo_reg = addr_reg; - label->raddr = raddr; + label->raddr = tcg_splitwx_to_rx(raddr); label->label_ptr[0] = label_ptr; } @@ -1849,7 +1854,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, #endif /* CONFIG_SOFTMMU */ } -static tcg_insn_unit *tb_ret_addr; +static const tcg_insn_unit *tb_ret_addr; static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1873,7 +1878,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_exit_tb: /* Reuse the zeroing that exists for goto_ptr. */ if (a0 == 0) { - tcg_out_goto_long(s, s->code_gen_epilogue); + tcg_out_goto_long(s, tcg_code_gen_epilogue); } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); tcg_out_goto_long(s, tb_ret_addr); @@ -2894,11 +2899,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) * Return path for goto_ptr. Set return value to 0, a-la exit_tb, * and fall through to the rest of the epilogue. */ - s->code_gen_epilogue = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0); /* TB epilogue */ - tb_ret_addr = s->code_ptr; + tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); /* Remove TCG locals stack space. */ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, @@ -2964,7 +2969,7 @@ static const DebugFrame debug_frame = { } }; -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 8a6b97598e..5ec30dba25 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -88,6 +88,7 @@ typedef enum { #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_goto_ptr 1 +#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 @@ -148,7 +149,7 @@ typedef enum { #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 62c37a954b..0fd1126454 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -187,29 +187,32 @@ static const uint8_t tcg_cond_to_arm_cond[] = { [TCG_COND_GTU] = COND_HI, }; -static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) >> 2; + if (offset == sextract32(offset, 0, 24)) { - *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); + *src_rw = deposit32(*src_rw, 0, 24, offset); return true; } return false; } -static inline bool reloc_pc13(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - ptrdiff_t offset = tcg_ptr_byte_diff(target, code_ptr) - 8; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8; if (offset >= -0xfff && offset <= 0xfff) { - tcg_insn_unit insn = *code_ptr; + tcg_insn_unit insn = *src_rw; bool u = (offset >= 0); if (!u) { offset = -offset; } insn = deposit32(insn, 23, 1, u); insn = deposit32(insn, 0, 12, offset); - *code_ptr = insn; + *src_rw = insn; return true; } return false; @@ -221,9 +224,9 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, tcg_debug_assert(addend == 0); if (type == R_ARM_PC24) { - return reloc_pc24(code_ptr, (tcg_insn_unit *)value); + return reloc_pc24(code_ptr, (const tcg_insn_unit *)value); } else if (type == R_ARM_PC13) { - return reloc_pc13(code_ptr, (tcg_insn_unit *)value); + return reloc_pc13(code_ptr, (const tcg_insn_unit *)value); } else { g_assert_not_reached(); } @@ -617,7 +620,7 @@ static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) /* Check for a pc-relative address. This will usually be the TB, or within the TB, which is immediately before the code block. */ - diff = arg - ((intptr_t)s->code_ptr + 8); + diff = tcg_pcrel_diff(s, (void *)arg) - 8; if (diff >= 0) { rot = encode_imm(diff); if (rot >= 0) { @@ -1019,7 +1022,7 @@ static inline void tcg_out_st8(TCGContext *s, int cond, * with the code buffer limited to 16MB we wouldn't need the long case. * But we also use it for the tail-call to the qemu_ld/st helpers, which does. */ -static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) +static void tcg_out_goto(TCGContext *s, int cond, const tcg_insn_unit *addr) { intptr_t addri = (intptr_t)addr; ptrdiff_t disp = tcg_pcrel_diff(s, addr); @@ -1033,7 +1036,7 @@ static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) /* The call case is mostly used for helpers - so it's not unreasonable * for them to be beyond branch range */ -static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) { intptr_t addri = (intptr_t)addr; ptrdiff_t disp = tcg_pcrel_diff(s, addr); @@ -1337,7 +1340,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->datahi_reg = datahi; label->addrlo_reg = addrlo; label->addrhi_reg = addrhi; - label->raddr = raddr; + label->raddr = tcg_splitwx_to_rx(raddr); label->label_ptr[0] = label_ptr; } @@ -1348,7 +1351,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) MemOp opc = get_memop(oi); void *func; - if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { + if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -1411,7 +1414,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGMemOpIdx oi = lb->oi; MemOp opc = get_memop(oi); - if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) { + if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -1762,8 +1765,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGReg base = TCG_REG_PC; tcg_debug_assert(s->tb_jmp_insn_offset == 0); - ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]); - dif = ptr - ((intptr_t)s->code_ptr + 8); + ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]); + dif = tcg_pcrel_diff(s, (void *)ptr) - 8; dil = sextract32(dif, 0, 12); if (dif != dil) { /* The TB is close, but outside the 12 bits addressable by @@ -2297,7 +2300,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) * Return path for goto_ptr. Set return value to 0, a-la exit_tb, * and fall through to the rest of the epilogue. */ - s->code_gen_epilogue = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); tcg_out_epilogue(s); } @@ -2353,7 +2356,7 @@ static const DebugFrame debug_frame = { } }; -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index f1955ce4ac..8d1fee6327 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -126,6 +126,7 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_direct_jump 0 +#define TCG_TARGET_HAS_qemu_st8_i32 0 enum { TCG_AREG0 = TCG_REG_R6, @@ -135,7 +136,7 @@ enum { #define TCG_TARGET_HAS_MEMORY_BSWAP 1 /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index d8797ed398..46e856f442 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -154,18 +154,17 @@ bool have_bmi1; bool have_popcnt; bool have_avx1; bool have_avx2; +bool have_movbe; #ifdef CONFIG_CPUID_H -static bool have_movbe; static bool have_bmi2; static bool have_lzcnt; #else -# define have_movbe 0 # define have_bmi2 0 # define have_lzcnt 0 #endif -static tcg_insn_unit *tb_ret_addr; +static const tcg_insn_unit *tb_ret_addr; static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) @@ -173,7 +172,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, value += addend; switch(type) { case R_386_PC32: - value -= (uintptr_t)code_ptr; + value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr); if (value != (int32_t)value) { return false; } @@ -182,7 +181,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, tcg_patch32(code_ptr, value); break; case R_386_PC8: - value -= (uintptr_t)code_ptr; + value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr); if (value != (int8_t)value) { return false; } @@ -246,11 +245,21 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, ct->regs |= ALL_VECTOR_REGS; break; - /* qemu_ld/st address constraint */ case 'L': + /* qemu_ld/st data+address constraint */ ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff; +#ifdef CONFIG_SOFTMMU tcg_regset_reset_reg(ct->regs, TCG_REG_L0); tcg_regset_reset_reg(ct->regs, TCG_REG_L1); +#endif + break; + case 's': + /* qemu_st8_i32 data constraint */ + ct->regs = 0xf; +#ifdef CONFIG_SOFTMMU + tcg_regset_reset_reg(ct->regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->regs, TCG_REG_L1); +#endif break; case 'e': @@ -1006,7 +1015,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } /* Try a 7 byte pc-relative lea before the 10 byte movq. */ - diff = arg - ((uintptr_t)s->code_ptr + 7); + diff = tcg_pcrel_diff(s, (const void *)arg) - 7; if (diff == (int32_t)diff) { tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); @@ -1452,7 +1461,7 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, default: tcg_abort(); } - tcg_out_label(s, label_next, s->code_ptr); + tcg_out_label(s, label_next); } #endif @@ -1494,10 +1503,10 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); tcg_out_jxx(s, JCC_JMP, label_over, 1); - tcg_out_label(s, label_true, s->code_ptr); + tcg_out_label(s, label_true); tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); - tcg_out_label(s, label_over, s->code_ptr); + tcg_out_label(s, label_over); } else { /* When the destination does not overlap one of the arguments, clear the destination first, jump if cond false, and emit an @@ -1511,7 +1520,7 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, tcg_out_brcond2(s, new_args, const_args+1, 1); tgen_arithi(s, ARITH_ADD, args[0], 1, 0); - tcg_out_label(s, label_over, s->code_ptr); + tcg_out_label(s, label_over); } } #endif @@ -1525,7 +1534,7 @@ static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw, TCGLabel *over = gen_new_label(); tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); tcg_out_mov(s, TCG_TYPE_I32, dest, v1); - tcg_out_label(s, over, s->code_ptr); + tcg_out_label(s, over); } } @@ -1591,7 +1600,7 @@ static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, } } -static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) +static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) { intptr_t disp = tcg_pcrel_diff(s, dest) - 5; @@ -1610,12 +1619,12 @@ static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) } } -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) { tcg_out_branch(s, 1, dest); } -static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) +static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest) { tcg_out_branch(s, 0, dest); } @@ -1786,7 +1795,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64, label->datahi_reg = datahi; label->addrlo_reg = addrlo; label->addrhi_reg = addrhi; - label->raddr = raddr; + label->raddr = tcg_splitwx_to_rx(raddr); label->label_ptr[0] = label_ptr[0]; if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { label->label_ptr[1] = label_ptr[1]; @@ -1986,13 +1995,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg base, int index, intptr_t ofs, int seg, bool is64, MemOp memop) { - const MemOp real_bswap = memop & MO_BSWAP; - MemOp bswap = real_bswap; + bool use_movbe = false; int rexw = is64 * P_REXW; int movop = OPC_MOVL_GvEv; - if (have_movbe && real_bswap) { - bswap = 0; + /* Do big-endian loads with movbe. */ + if (memop & MO_BSWAP) { + tcg_debug_assert(have_movbe); + use_movbe = true; movop = OPC_MOVBE_GyMy; } @@ -2006,23 +2016,28 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, base, index, 0, ofs); break; case MO_UW: - tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, - base, index, 0, ofs); - if (real_bswap) { - tcg_out_rolw_8(s, datalo); - } - break; - case MO_SW: - if (real_bswap) { - if (have_movbe) { + if (use_movbe) { + /* There is no extending movbe; only low 16-bits are modified. */ + if (datalo != base && datalo != index) { + /* XOR breaks dependency chains. */ + tgen_arithr(s, ARITH_XOR, datalo, datalo); tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, datalo, base, index, 0, ofs); } else { - tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, - base, index, 0, ofs); - tcg_out_rolw_8(s, datalo); + tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, + datalo, base, index, 0, ofs); + tcg_out_ext16u(s, datalo, datalo); } - tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo); + } else { + tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, + base, index, 0, ofs); + } + break; + case MO_SW: + if (use_movbe) { + tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, + datalo, base, index, 0, ofs); + tcg_out_ext16s(s, datalo, datalo, rexw); } else { tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg, datalo, base, index, 0, ofs); @@ -2030,18 +2045,12 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, break; case MO_UL: tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); - if (bswap) { - tcg_out_bswap32(s, datalo); - } break; #if TCG_TARGET_REG_BITS == 64 case MO_SL: - if (real_bswap) { - tcg_out_modrm_sib_offset(s, movop + seg, datalo, + if (use_movbe) { + tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo, base, index, 0, ofs); - if (bswap) { - tcg_out_bswap32(s, datalo); - } tcg_out_ext32s(s, datalo, datalo); } else { tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, @@ -2053,12 +2062,9 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, if (TCG_TARGET_REG_BITS == 64) { tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, base, index, 0, ofs); - if (bswap) { - tcg_out_bswap64(s, datalo); - } } else { - if (real_bswap) { - int t = datalo; + if (use_movbe) { + TCGReg t = datalo; datalo = datahi; datahi = t; } @@ -2073,14 +2079,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); } - if (bswap) { - tcg_out_bswap32(s, datalo); - tcg_out_bswap32(s, datahi); - } } break; default: - tcg_abort(); + g_assert_not_reached(); } } @@ -2128,69 +2130,40 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg base, int index, intptr_t ofs, int seg, MemOp memop) { - /* ??? Ideally we wouldn't need a scratch register. For user-only, - we could perform the bswap twice to restore the original value - instead of moving to the scratch. But as it is, the L constraint - means that TCG_REG_L0 is definitely free here. */ - const TCGReg scratch = TCG_REG_L0; - const MemOp real_bswap = memop & MO_BSWAP; - MemOp bswap = real_bswap; + bool use_movbe = false; int movop = OPC_MOVL_EvGv; - if (have_movbe && real_bswap) { - bswap = 0; + /* + * Do big-endian stores with movbe or softmmu. + * User-only without movbe will have its swapping done generically. + */ + if (memop & MO_BSWAP) { + tcg_debug_assert(have_movbe); + use_movbe = true; movop = OPC_MOVBE_MyGy; } switch (memop & MO_SIZE) { case MO_8: - /* In 32-bit mode, 8-bit stores can only happen from [abcd]x. - Use the scratch register if necessary. */ - if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - datalo = scratch; - } + /* This is handled with constraints on INDEX_op_qemu_st8_i32. */ + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, datalo, base, index, 0, ofs); break; case MO_16: - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_rolw_8(s, scratch); - datalo = scratch; - } tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo, base, index, 0, ofs); break; case MO_32: - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_bswap32(s, scratch); - datalo = scratch; - } tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); break; case MO_64: if (TCG_TARGET_REG_BITS == 64) { - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); - tcg_out_bswap64(s, scratch); - datalo = scratch; - } tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, base, index, 0, ofs); - } else if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); - tcg_out_bswap32(s, scratch); - tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch, - base, index, 0, ofs); - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_bswap32(s, scratch); - tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch, - base, index, 0, ofs + 4); } else { - if (real_bswap) { - int t = datalo; + if (use_movbe) { + TCGReg t = datalo; datalo = datahi; datahi = t; } @@ -2201,7 +2174,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } break; default: - tcg_abort(); + g_assert_not_reached(); } } @@ -2267,7 +2240,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_exit_tb: /* Reuse the zeroing that exists for goto_ptr. */ if (a0 == 0) { - tcg_out_jmp(s, s->code_gen_epilogue); + tcg_out_jmp(s, tcg_code_gen_epilogue); } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); tcg_out_jmp(s, tb_ret_addr); @@ -2280,7 +2253,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* jump displacement must be aligned for atomic patching; * see if we need to add extra nops before jump */ - gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4)); + gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; if (gap != 1) { tcg_out_nopn(s, gap - 1); } @@ -2520,6 +2493,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_qemu_ld(s, args, 1); break; case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st8_i32: tcg_out_qemu_st(s, args, 0); break; case INDEX_op_qemu_st_i64: @@ -2978,9 +2952,11 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } }; static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } }; static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } }; + static const TCGTargetOpDef s_L = { .args_ct_str = { "s", "L" } }; static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } }; static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } }; static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } }; + static const TCGTargetOpDef s_L_L = { .args_ct_str = { "s", "L", "L" } }; static const TCGTargetOpDef r_r_L_L = { .args_ct_str = { "r", "r", "L", "L" } }; static const TCGTargetOpDef L_L_L_L @@ -3174,6 +3150,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L; case INDEX_op_qemu_st_i32: return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L; + case INDEX_op_qemu_st8_i32: + return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &s_L : &s_L_L; case INDEX_op_qemu_ld_i64: return (TCG_TARGET_REG_BITS == 64 ? &r_L : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L @@ -3825,11 +3803,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) * Return path for goto_ptr. Set return value to 0, a-la exit_tb, * and fall through to the rest of the epilogue. */ - s->code_gen_epilogue = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0); /* TB epilogue */ - tb_ret_addr = s->code_ptr; + tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); @@ -3998,7 +3976,7 @@ static const DebugFrame debug_frame = { #endif #if defined(ELF_HOST_MACHINE) -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index cd067e0b30..b693d3692d 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -101,6 +101,7 @@ extern bool have_bmi1; extern bool have_popcnt; extern bool have_avx1; extern bool have_avx2; +extern bool have_movbe; /* optional instructions */ #define TCG_TARGET_HAS_div2_i32 1 @@ -171,6 +172,9 @@ extern bool have_avx2; #define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 +#define TCG_TARGET_HAS_qemu_st8_i32 0 +#else +#define TCG_TARGET_HAS_qemu_st8_i32 1 #endif /* We do not support older SSE systems, only beginning with AVX1. */ @@ -206,11 +210,11 @@ extern bool have_avx2; #define TCG_TARGET_extract_i64_valid(ofs, len) \ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, - uintptr_t jmp_addr, uintptr_t addr) +static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) { /* patch the branch destination */ - qatomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4)); + qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); /* no need to flush icache explicitly */ } @@ -225,7 +229,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) -#define TCG_TARGET_HAS_MEMORY_BSWAP 1 +#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe #ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 41be574e89..add157f6c3 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -139,33 +139,21 @@ static const TCGReg tcg_target_call_oarg_regs[2] = { TCG_REG_V1 }; -static tcg_insn_unit *tb_ret_addr; -static tcg_insn_unit *bswap32_addr; -static tcg_insn_unit *bswap32u_addr; -static tcg_insn_unit *bswap64_addr; +static const tcg_insn_unit *tb_ret_addr; +static const tcg_insn_unit *bswap32_addr; +static const tcg_insn_unit *bswap32u_addr; +static const tcg_insn_unit *bswap64_addr; -static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) +static bool reloc_pc16(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { /* Let the compiler perform the right-shift as part of the arithmetic. */ - ptrdiff_t disp = target - (pc + 1); - tcg_debug_assert(disp == (int16_t)disp); - return disp & 0xffff; -} - -static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); -} - -static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - tcg_debug_assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); - return ((uintptr_t)target >> 2) & 0x3ffffff; -} - -static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t disp = target - (src_rx + 1); + if (disp == (int16_t)disp) { + *src_rw = deposit32(*src_rw, 0, 16, disp); + return true; + } + return false; } static bool patch_reloc(tcg_insn_unit *code_ptr, int type, @@ -173,8 +161,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, { tcg_debug_assert(type == R_MIPS_PC16); tcg_debug_assert(addend == 0); - reloc_pc16(code_ptr, (tcg_insn_unit *)value); - return true; + return reloc_pc16(code_ptr, (const tcg_insn_unit *)value); } #define TCG_CT_CONST_ZERO 0x100 @@ -516,10 +503,10 @@ static void tcg_out_opc_sa64(TCGContext *s, MIPSInsn opc1, MIPSInsn opc2, * Type jump. * Returns true if the branch was in range and the insn was emitted. */ -static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) +static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, const void *target) { uintptr_t dest = (uintptr_t)target; - uintptr_t from = (uintptr_t)s->code_ptr + 4; + uintptr_t from = (uintptr_t)tcg_splitwx_to_rx(s->code_ptr) + 4; int32_t inst; /* The pc-region branch happens within the 256MB region of @@ -631,7 +618,7 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) } } -static void tcg_out_bswap_subr(TCGContext *s, tcg_insn_unit *sub) +static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) { bool ok = tcg_out_opc_jmp(s, OPC_JAL, sub); tcg_debug_assert(ok); @@ -924,11 +911,7 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, } tcg_out_opc_br(s, b_opc, arg1, arg2); - if (l->has_value) { - reloc_pc16(s->code_ptr - 1, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); - } + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); tcg_out_nop(s); } @@ -1079,7 +1062,7 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } -static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { /* Note that the ABI requires the called function's address to be loaded into T9, even if a direct branch is in range. */ @@ -1097,7 +1080,7 @@ static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) } } -static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) { tcg_out_call_int(s, arg, false); tcg_out_nop(s); @@ -1300,7 +1283,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, label->datahi_reg = datahi; label->addrlo_reg = addrlo; label->addrhi_reg = addrhi; - label->raddr = raddr; + label->raddr = tcg_splitwx_to_rx(raddr); label->label_ptr[0] = label_ptr[0]; if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { label->label_ptr[1] = label_ptr[1]; @@ -1309,15 +1292,17 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { + const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); TCGMemOpIdx oi = l->oi; MemOp opc = get_memop(oi); TCGReg v0; int i; /* resolve label address */ - reloc_pc16(l->label_ptr[0], s->code_ptr); - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - reloc_pc16(l->label_ptr[1], s->code_ptr); + if (!reloc_pc16(l->label_ptr[0], tgt_rx) + || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS + && !reloc_pc16(l->label_ptr[1], tgt_rx))) { + return false; } i = 1; @@ -1345,7 +1330,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - reloc_pc16(s->code_ptr - 1, l->raddr); + if (!reloc_pc16(s->code_ptr - 1, l->raddr)) { + return false; + } /* delay slot */ if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) { @@ -1359,15 +1346,17 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { + const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); TCGMemOpIdx oi = l->oi; MemOp opc = get_memop(oi); MemOp s_bits = opc & MO_SIZE; int i; /* resolve label address */ - reloc_pc16(l->label_ptr[0], s->code_ptr); - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - reloc_pc16(l->label_ptr[1], s->code_ptr); + if (!reloc_pc16(l->label_ptr[0], tgt_rx) + || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS + && !reloc_pc16(l->label_ptr[1], tgt_rx))) { + return false; } i = 1; @@ -2483,11 +2472,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) * Return path for goto_ptr. Set return value to 0, a-la exit_tb, * and fall through to the rest of the epilogue. */ - s->code_gen_epilogue = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO); /* TB epilogue */ - tb_ret_addr = s->code_ptr; + tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], TCG_REG_SP, SAVE_OFS + i * REG_SIZE); @@ -2507,7 +2496,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* * bswap32 -- 32-bit swap (signed result for mips64). a0 = abcd. */ - bswap32_addr = align_code_ptr(s); + bswap32_addr = tcg_splitwx_to_rx(align_code_ptr(s)); /* t3 = (ssss)d000 */ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP3, TCG_TMP0, 24); /* t1 = 000a */ @@ -2535,7 +2524,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* * bswap32u -- unsigned 32-bit swap. a0 = ....abcd. */ - bswap32u_addr = align_code_ptr(s); + bswap32u_addr = tcg_splitwx_to_rx(align_code_ptr(s)); /* t1 = (0000)000d */ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff); /* t3 = 000a */ @@ -2561,7 +2550,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* * bswap64 -- 64-bit swap. a0 = abcdefgh */ - bswap64_addr = align_code_ptr(s); + bswap64_addr = tcg_splitwx_to_rx(align_code_ptr(s)); /* t3 = h0000000 */ tcg_out_dsll(s, TCG_TMP3, TCG_TMP0, 56); /* t1 = 0000000a */ @@ -2656,11 +2645,11 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, - uintptr_t addr) +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) { - qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2)); - flush_icache_range(jmp_addr, jmp_addr + 4); + qatomic_set((uint32_t *)jmp_rw, deposit32(OPC_J, 0, 26, addr >> 2)); + flush_idcache_range(jmp_rx, jmp_rw, 4); } typedef struct { @@ -2702,7 +2691,7 @@ static const DebugFrame debug_frame = { } }; -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 92c1d63da3..c2c32fb38f 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -169,6 +169,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions @@ -201,7 +202,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/optimize.c b/tcg/optimize.c index 7ca71de956..1fb42eb2a9 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1541,6 +1541,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st8_i32: case INDEX_op_qemu_st_i64: case INDEX_op_call: /* Opcodes that touch guest memory stop the optimization. */ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 0d068ec8ab..19a4a12f15 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -62,8 +62,6 @@ #define TCG_CT_CONST_MONE 0x2000 #define TCG_CT_CONST_WSZ 0x4000 -static tcg_insn_unit *tb_ret_addr; - TCGPowerISA have_isa; static bool have_isel; bool have_altivec; @@ -184,35 +182,41 @@ static inline bool in_range_b(tcg_target_long target) return target == sextract64(target, 0, 26); } -static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) +static uint32_t reloc_pc24_val(const tcg_insn_unit *pc, + const tcg_insn_unit *target) { ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); tcg_debug_assert(in_range_b(disp)); return disp & 0x3fffffc; } -static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) +static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); + if (in_range_b(disp)) { - *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc); + *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc); return true; } return false; } -static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) +static uint16_t reloc_pc14_val(const tcg_insn_unit *pc, + const tcg_insn_unit *target) { ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); tcg_debug_assert(disp == (int16_t) disp); return disp & 0xfffc; } -static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) +static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx); + if (disp == (int16_t) disp) { - *pc = (*pc & ~0xfffc) | (disp & 0xfffc); + *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc); return true; } return false; @@ -673,12 +677,12 @@ static const uint32_t tcg_to_isel[] = { static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - tcg_insn_unit *target; + const tcg_insn_unit *target; int16_t lo; int32_t hi; value += addend; - target = (tcg_insn_unit *)value; + target = (const tcg_insn_unit *)value; switch (type) { case R_PPC_REL14: @@ -837,7 +841,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* Load addresses within the TB with one insn. */ - tb_diff = arg - (intptr_t)s->code_gen_ptr; + tb_diff = tcg_tbrel_diff(s, (void *)arg); if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) { tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff)); return; @@ -890,7 +894,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, /* Use the constant pool, if possible. */ if (!in_prologue && USE_REG_TB) { new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr, - -(intptr_t)s->code_gen_ptr); + tcg_tbrel_diff(s, NULL)); tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0)); return; } @@ -940,7 +944,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, */ if (USE_REG_TB) { rel = R_PPC_ADDR16; - add = -(intptr_t)s->code_gen_ptr; + add = tcg_tbrel_diff(s, NULL); } else { rel = R_PPC_ADDR32; add = 0; @@ -1106,7 +1110,7 @@ static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) tcg_out_zori32(s, dst, src, c, XORI, XORIS); } -static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target) +static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target) { ptrdiff_t disp = tcg_pcrel_diff(s, target); if (in_range_b(disp)) { @@ -1544,7 +1548,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) { if (l->has_value) { - bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr); + bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); } else { tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); } @@ -1722,13 +1726,13 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, insn); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, - uintptr_t addr) +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) { if (TCG_TARGET_REG_BITS == 64) { tcg_insn_unit i1, i2; intptr_t tb_diff = addr - tc_ptr; - intptr_t br_diff = addr - (jmp_addr + 4); + intptr_t br_diff = addr - (jmp_rx + 4); uint64_t pair; /* This does not exercise the range of the branch, but we do @@ -1752,23 +1756,23 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, /* As per the enclosing if, this is ppc64. Avoid the _Static_assert within qatomic_set that would fail to build a ppc32 host. */ - qatomic_set__nocheck((uint64_t *)jmp_addr, pair); - flush_icache_range(jmp_addr, jmp_addr + 8); + qatomic_set__nocheck((uint64_t *)jmp_rw, pair); + flush_idcache_range(jmp_rx, jmp_rw, 8); } else { - intptr_t diff = addr - jmp_addr; + intptr_t diff = addr - jmp_rx; tcg_debug_assert(in_range_b(diff)); - qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc)); - flush_icache_range(jmp_addr, jmp_addr + 4); + qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc)); + flush_idcache_range(jmp_rx, jmp_rw, 4); } } -static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) { #ifdef _CALL_AIX /* Look through the descriptor. If the branch is in range, and we don't have to spend too much effort on building the toc. */ - void *tgt = ((void **)target)[0]; - uintptr_t toc = ((uintptr_t *)target)[1]; + const void *tgt = ((const void * const *)target)[0]; + uintptr_t toc = ((const uintptr_t *)target)[1]; intptr_t diff = tcg_pcrel_diff(s, tgt); if (in_range_b(diff) && toc == (uint32_t)toc) { @@ -1997,7 +2001,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->datahi_reg = datahi_reg; label->addrlo_reg = addrlo_reg; label->addrhi_reg = addrhi_reg; - label->raddr = raddr; + label->raddr = tcg_splitwx_to_rx(raddr); label->label_ptr[0] = lptr; } @@ -2007,7 +2011,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) MemOp opc = get_memop(oi); TCGReg hi, lo, arg = TCG_REG_R3; - if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { + if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -2055,7 +2059,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) MemOp s_bits = opc & MO_SIZE; TCGReg hi, lo, arg = TCG_REG_R3; - if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) { + if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -2306,10 +2310,10 @@ static void tcg_target_qemu_prologue(TCGContext *s) int i; #ifdef _CALL_AIX - void **desc = (void **)s->code_ptr; - desc[0] = desc + 2; /* entry point */ - desc[1] = 0; /* environment pointer */ - s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ + const void **desc = (const void **)s->code_ptr; + desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */ + desc[1] = 0; /* environment pointer */ + s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ #endif tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, @@ -2341,7 +2345,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCCTR | BO_ALWAYS); /* Epilogue */ - s->code_gen_epilogue = tb_ret_addr = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { @@ -2362,7 +2366,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, switch (opc) { case INDEX_op_exit_tb: tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); - tcg_out_b(s, 0, tb_ret_addr); + tcg_out_b(s, 0, tcg_code_gen_epilogue); break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { @@ -2392,9 +2396,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, set_jmp_reset_offset(s, args[0]); if (USE_REG_TB) { /* For the unlinked case, need to reset TCG_REG_TB. */ - c = -tcg_current_code_size(s); - assert(c == (int16_t)c); - tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c)); + tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, + -tcg_current_code_size(s)); } break; case INDEX_op_goto_ptr: @@ -2411,7 +2414,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, uint32_t insn = B; if (l->has_value) { - insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr); + insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), + l->u.value_ptr); } else { tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); } @@ -3847,7 +3851,7 @@ static DebugFrame debug_frame = { } }; -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { uint8_t *p = &debug_frame.fde_reg_ofs[3]; int i; diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index a509a19628..d1339afc66 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -108,6 +108,7 @@ extern bool have_vsx; #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_direct_jump 1 +#define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 @@ -175,7 +176,7 @@ extern bool have_vsx; #define TCG_TARGET_HAS_bitsel_vec have_vsx #define TCG_TARGET_HAS_cmpsel_vec 0 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 4089e29cd9..c60b91ba58 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -425,39 +425,44 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) * Relocations */ -static bool reloc_sbimm12(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + intptr_t offset = (intptr_t)target - (intptr_t)src_rx; - if (offset == sextreg(offset, 1, 12) << 1) { - code_ptr[0] |= encode_sbimm12(offset); + tcg_debug_assert((offset & 1) == 0); + if (offset == sextreg(offset, 0, 12)) { + *src_rw |= encode_sbimm12(offset); return true; } return false; } -static bool reloc_jimm20(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + intptr_t offset = (intptr_t)target - (intptr_t)src_rx; - if (offset == sextreg(offset, 1, 20) << 1) { - code_ptr[0] |= encode_ujimm20(offset); + tcg_debug_assert((offset & 1) == 0); + if (offset == sextreg(offset, 0, 20)) { + *src_rw |= encode_ujimm20(offset); return true; } return false; } -static bool reloc_call(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target) { - intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + intptr_t offset = (intptr_t)target - (intptr_t)src_rx; int32_t lo = sextreg(offset, 0, 12); int32_t hi = offset - lo; if (offset == hi + lo) { - code_ptr[0] |= encode_uimm20(hi); - code_ptr[1] |= encode_imm12(lo); + src_rw[0] |= encode_uimm20(hi); + src_rw[1] |= encode_imm12(lo); return true; } @@ -467,43 +472,16 @@ static bool reloc_call(tcg_insn_unit *code_ptr, tcg_insn_unit *target) static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - uint32_t insn = *code_ptr; - intptr_t diff; - bool short_jmp; - tcg_debug_assert(addend == 0); - switch (type) { case R_RISCV_BRANCH: - diff = value - (uintptr_t)code_ptr; - short_jmp = diff == sextreg(diff, 0, 12); - if (short_jmp) { - return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value); - } else { - /* Invert the condition */ - insn = insn ^ (1 << 12); - /* Clear the offset */ - insn &= 0x01fff07f; - /* Set the offset to the PC + 8 */ - insn |= encode_sbimm12(8); - - /* Move forward */ - code_ptr[0] = insn; - - /* Overwrite the NOP with jal x0,value */ - diff = value - (uintptr_t)(code_ptr + 1); - insn = encode_uj(OPC_JAL, TCG_REG_ZERO, diff); - code_ptr[1] = insn; - - return true; - } - break; + return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value); case R_RISCV_JAL: return reloc_jimm20(code_ptr, (tcg_insn_unit *)value); case R_RISCV_CALL: return reloc_call(code_ptr, (tcg_insn_unit *)value); default: - tcg_abort(); + g_assert_not_reached(); } } @@ -557,7 +535,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, if (tmp == (int32_t)tmp) { tcg_out_opc_upper(s, OPC_AUIPC, rd, 0); tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0); - ret = reloc_call(s->code_ptr - 2, (tcg_insn_unit *)val); + ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val); tcg_debug_assert(ret == true); return; } @@ -777,21 +755,8 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, arg2 = t; } - if (l->has_value) { - intptr_t diff = tcg_pcrel_diff(s, l->u.value_ptr); - if (diff == sextreg(diff, 0, 12)) { - tcg_out_opc_branch(s, op, arg1, arg2, diff); - } else { - /* Invert the conditional branch. */ - tcg_out_opc_branch(s, op ^ (1 << 12), arg1, arg2, 8); - tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, diff - 4); - } - } else { - tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0); - tcg_out_opc_branch(s, op, arg1, arg2, 0); - /* NOP to allow patching later */ - tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); - } + tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0); + tcg_out_opc_branch(s, op, arg1, arg2, 0); } static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, @@ -854,28 +819,21 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, g_assert_not_reached(); } -static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) -{ - ptrdiff_t offset = tcg_pcrel_diff(s, target); - tcg_debug_assert(offset == sextreg(offset, 1, 20) << 1); - tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); -} - -static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; ptrdiff_t offset = tcg_pcrel_diff(s, arg); int ret; - if (offset == sextreg(offset, 1, 20) << 1) { + tcg_debug_assert((offset & 1) == 0); + if (offset == sextreg(offset, 0, 20)) { /* short jump: -2097150 to 2097152 */ tcg_out_opc_jump(s, OPC_JAL, link, offset); - } else if (TCG_TARGET_REG_BITS == 32 || - offset == sextreg(offset, 1, 31) << 1) { + } else if (TCG_TARGET_REG_BITS == 32 || offset == (int32_t)offset) { /* long jump: -2147483646 to 2147483648 */ tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0); tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0); - ret = reloc_call(s->code_ptr - 2, arg);\ + ret = reloc_call(s->code_ptr - 2, arg); tcg_debug_assert(ret == true); } else if (TCG_TARGET_REG_BITS == 64) { /* far jump: 64-bit */ @@ -888,7 +846,7 @@ static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) } } -static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) { tcg_out_call_int(s, arg, false); } @@ -962,6 +920,13 @@ QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS); QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11)); +static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) +{ + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); + bool ok = reloc_jimm20(s->code_ptr - 1, target); + tcg_debug_assert(ok); +} + static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, TCGReg addrh, TCGMemOpIdx oi, tcg_insn_unit **label_ptr, bool is_load) @@ -1007,8 +972,6 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, /* Compare masked address with the TLB entry. */ label_ptr[0] = s->code_ptr; tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); - /* NOP to allow patching later */ - tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); /* TLB Hit - translate address using addend. */ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { @@ -1033,7 +996,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, label->datahi_reg = datahi; label->addrlo_reg = addrlo; label->addrhi_reg = addrhi; - label->raddr = raddr; + label->raddr = tcg_splitwx_to_rx(raddr); label->label_ptr[0] = label_ptr[0]; } @@ -1052,8 +1015,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* resolve label address */ - if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, - (intptr_t) s->code_ptr, 0)) { + if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -1087,8 +1049,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* resolve label address */ - if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, - (intptr_t) s->code_ptr, 0)) { + if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } @@ -1274,7 +1235,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #endif } -static tcg_insn_unit *tb_ret_addr; +static const tcg_insn_unit *tb_ret_addr; static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) @@ -1288,7 +1249,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_exit_tb: /* Reuse the zeroing that exists for goto_ptr. */ if (a0 == 0) { - tcg_out_call_int(s, s->code_gen_epilogue, true); + tcg_out_call_int(s, tcg_code_gen_epilogue, true); } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); tcg_out_call_int(s, tb_ret_addr, true); @@ -1822,11 +1783,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); /* Return path for goto_ptr. Set return value to 0 */ - s->code_gen_epilogue = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); /* TB epilogue */ - tb_ret_addr = s->code_ptr; + tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], TCG_REG_SP, SAVE_OFS + i * REG_SIZE); @@ -1907,7 +1868,7 @@ static const DebugFrame debug_frame = { } }; -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index c1bd52bb9a..727c8df418 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -119,6 +119,7 @@ typedef enum { #define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_brcond2 1 #define TCG_TARGET_HAS_setcond2 1 +#define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_movcond_i64 0 @@ -160,7 +161,7 @@ typedef enum { #endif /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc index c5e096449b..d7ef079055 100644 --- a/tcg/s390/tcg-target.c.inc +++ b/tcg/s390/tcg-target.c.inc @@ -363,36 +363,37 @@ static void * const qemu_st_helpers[16] = { }; #endif -static tcg_insn_unit *tb_ret_addr; +static const tcg_insn_unit *tb_ret_addr; uint64_t s390_facilities; -static bool patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *src_rw, int type, intptr_t value, intptr_t addend) { + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); intptr_t pcrel2; uint32_t old; value += addend; - pcrel2 = (tcg_insn_unit *)value - code_ptr; + pcrel2 = (tcg_insn_unit *)value - src_rx; switch (type) { case R_390_PC16DBL: if (pcrel2 == (int16_t)pcrel2) { - tcg_patch16(code_ptr, pcrel2); + tcg_patch16(src_rw, pcrel2); return true; } break; case R_390_PC32DBL: if (pcrel2 == (int32_t)pcrel2) { - tcg_patch32(code_ptr, pcrel2); + tcg_patch32(src_rw, pcrel2); return true; } break; case R_390_20: if (value == sextract64(value, 0, 20)) { - old = *(uint32_t *)code_ptr & 0xf00000ff; + old = *(uint32_t *)src_rw & 0xf00000ff; old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4); - tcg_patch32(code_ptr, old); + tcg_patch32(src_rw, old); return true; } break; @@ -630,7 +631,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } } else if (USE_REG_TB && !in_prologue) { - ptrdiff_t off = sval - (uintptr_t)s->code_gen_ptr; + ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval); if (off == sextract64(off, 0, 20)) { /* This is certain to be an address within TB, and therefore OFF will be negative; don't try RX_LA. */ @@ -655,7 +656,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } else if (USE_REG_TB && !in_prologue) { tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0); new_pool_label(s, sval, R_390_20, s->code_ptr - 2, - -(intptr_t)s->code_gen_ptr); + tcg_tbrel_diff(s, NULL)); } else { TCGReg base = ret ? ret : TCG_TMP0; tcg_out_insn(s, RIL, LARL, base, 0); @@ -730,7 +731,8 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, } /* load data from an absolute host address */ -static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) +static void tcg_out_ld_abs(TCGContext *s, TCGType type, + TCGReg dest, const void *abs) { intptr_t addr = (intptr_t)abs; @@ -746,7 +748,7 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) } } if (USE_REG_TB) { - ptrdiff_t disp = abs - (void *)s->code_gen_ptr; + ptrdiff_t disp = tcg_tbrel_diff(s, abs); if (disp == sextract64(disp, 0, 20)) { tcg_out_ld(s, type, dest, TCG_REG_TB, disp); return; @@ -956,7 +958,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) { tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0); new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2, - -(intptr_t)s->code_gen_ptr); + tcg_tbrel_diff(s, NULL)); return; } } else { @@ -1015,7 +1017,7 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) } else if (USE_REG_TB) { tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0); new_pool_label(s, val, R_390_20, s->code_ptr - 2, - -(intptr_t)s->code_gen_ptr); + tcg_tbrel_diff(s, NULL)); } else { /* Perform the OR via sequential modifications to the high and low parts. Do this via recursion to handle 16-bit vs 32-bit @@ -1050,7 +1052,7 @@ static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) } else if (USE_REG_TB) { tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0); new_pool_label(s, val, R_390_20, s->code_ptr - 2, - -(intptr_t)s->code_gen_ptr); + tcg_tbrel_diff(s, NULL)); } else { /* Perform the xor by parts. */ tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM); @@ -1108,12 +1110,12 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, op = (is_unsigned ? RXY_CLY : RXY_CY); tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0); new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2, - 4 - (intptr_t)s->code_gen_ptr); + 4 - tcg_tbrel_diff(s, NULL)); } else { op = (is_unsigned ? RXY_CLG : RXY_CG); tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0); new_pool_label(s, c2, R_390_20, s->code_ptr - 2, - -(intptr_t)s->code_gen_ptr); + tcg_tbrel_diff(s, NULL)); } goto exit; } else { @@ -1302,9 +1304,9 @@ static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src, tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1); } -static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest) +static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest) { - ptrdiff_t off = dest - s->code_ptr; + ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; if (off == (int16_t)off) { tcg_out_insn(s, RI, BRC, cc, off); } else if (off == (int32_t)off) { @@ -1333,34 +1335,18 @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, TCGReg r2, TCGLabel *l) { - intptr_t off = 0; - - if (l->has_value) { - off = l->u.value_ptr - s->code_ptr; - tcg_debug_assert(off == (int16_t)off); - } else { - tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); - } - + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); - tcg_out16(s, off); + tcg_out16(s, 0); tcg_out16(s, cc << 12 | (opc & 0xff)); } static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, int i2, TCGLabel *l) { - tcg_target_long off = 0; - - if (l->has_value) { - off = l->u.value_ptr - s->code_ptr; - tcg_debug_assert(off == (int16_t)off); - } else { - tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); - } - + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); - tcg_out16(s, off); + tcg_out16(s, 0); tcg_out16(s, (i2 << 8) | (opc & 0xff)); } @@ -1415,9 +1401,9 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, tgen_branch(s, cc, l); } -static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) { - ptrdiff_t off = dest - s->code_ptr; + ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; if (off == (int32_t)off) { tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); } else { @@ -1601,7 +1587,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, label->oi = oi; label->datalo_reg = data; label->addrlo_reg = addr; - label->raddr = raddr; + label->raddr = tcg_splitwx_to_rx(raddr); label->label_ptr[0] = label_ptr; } @@ -1613,7 +1599,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) MemOp opc = get_memop(oi); if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, - (intptr_t)s->code_ptr, 2)) { + (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { return false; } @@ -1638,7 +1624,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) MemOp opc = get_memop(oi); if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL, - (intptr_t)s->code_ptr, 2)) { + (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { return false; } @@ -1756,7 +1742,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* Reuse the zeroing that exists for goto_ptr. */ a0 = args[0]; if (a0 == 0) { - tgen_gotoi(s, S390_CC_ALWAYS, s->code_gen_epilogue); + tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue); } else { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0); tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); @@ -1766,7 +1752,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: a0 = args[0]; if (s->tb_jmp_insn_offset) { - /* branch displacement must be aligned for atomic patching; + /* + * branch displacement must be aligned for atomic patching; * see if we need to add extra nop before branch */ if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { @@ -1779,7 +1766,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } else { /* load address stored at s->tb_jmp_target_addr + a0 */ tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB, - s->tb_jmp_target_addr + a0); + tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0)); /* and go there */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB); } @@ -1789,8 +1776,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, TCG_REG_TB to the beginning of this TB. */ if (USE_REG_TB) { int ofs = -tcg_current_code_size(s); - assert(ofs == (int16_t)ofs); - tcg_out_insn(s, RI, AGHI, TCG_REG_TB, ofs); + /* All TB are restricted to 64KiB by unwind info. */ + tcg_debug_assert(ofs == sextract64(ofs, 0, 20)); + tcg_out_insn(s, RXY, LAY, TCG_REG_TB, + TCG_REG_TB, TCG_REG_NONE, ofs); } break; @@ -2561,11 +2550,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) * Return path for goto_ptr. Set return value to 0, a-la exit_tb, * and fall through to the rest of the epilogue. */ - s->code_gen_epilogue = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0); /* TB epilogue */ - tb_ret_addr = s->code_ptr; + tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, @@ -2620,7 +2609,7 @@ static const DebugFrame debug_frame = { } }; -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index b4feb2f55a..641464eea4 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -97,6 +97,7 @@ extern uint64_t s390_facilities; #define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_direct_jump (s390_facilities & FACILITY_GEN_INST_EXT) +#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 @@ -145,12 +146,12 @@ enum { TCG_AREG0 = TCG_REG_R10, }; -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, - uintptr_t jmp_addr, uintptr_t addr) +static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) { /* patch the branch destination */ - intptr_t disp = addr - (jmp_addr - 2); - qatomic_set((int32_t *)jmp_addr, disp / 2); + intptr_t disp = addr - (jmp_rx - 2); + qatomic_set((int32_t *)jmp_rw, disp / 2); /* no need to flush icache explicitly */ } diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 6775bd30fc..922ae96481 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -291,14 +291,15 @@ static inline int check_fit_i32(int32_t val, unsigned int bits) # define check_fit_ptr check_fit_i32 #endif -static bool patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *src_rw, int type, intptr_t value, intptr_t addend) { - uint32_t insn = *code_ptr; + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw); + uint32_t insn = *src_rw; intptr_t pcrel; value += addend; - pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); + pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, src_rx); switch (type) { case R_SPARC_WDISP16: @@ -315,7 +316,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, g_assert_not_reached(); } - *code_ptr = insn; + *src_rw = insn; return true; } @@ -440,7 +441,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, /* A 13-bit constant relative to the TB. */ if (!in_prologue && USE_REG_TB) { - test = arg - (uintptr_t)s->code_gen_ptr; + test = tcg_tbrel_diff(s, (void *)arg); if (check_fit_ptr(test, 13)) { tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD); return; @@ -537,15 +538,15 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) +static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg) { - intptr_t diff = arg - (uintptr_t)s->code_gen_ptr; + intptr_t diff = tcg_tbrel_diff(s, arg); if (USE_REG_TB && check_fit_ptr(diff, 13)) { tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff); return; } - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); - tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); + tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff); + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff); } static inline void tcg_out_sety(TCGContext *s, TCGReg rs) @@ -840,7 +841,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); } -static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest, +static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, bool in_prologue) { ptrdiff_t disp = tcg_pcrel_diff(s, dest); @@ -855,7 +856,7 @@ static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest, } } -static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) { tcg_out_call_nodelay(s, dest, false); tcg_out_nop(s); @@ -868,8 +869,8 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) } #ifdef CONFIG_SOFTMMU -static tcg_insn_unit *qemu_ld_trampoline[16]; -static tcg_insn_unit *qemu_st_trampoline[16]; +static const tcg_insn_unit *qemu_ld_trampoline[16]; +static const tcg_insn_unit *qemu_st_trampoline[16]; static void emit_extend(TCGContext *s, TCGReg r, int op) { @@ -930,7 +931,7 @@ static void build_trampolines(TCGContext *s) while ((uintptr_t)s->code_ptr & 15) { tcg_out_nop(s); } - qemu_ld_trampoline[i] = s->code_ptr; + qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr); if (SPARC64 || TARGET_LONG_BITS == 32) { ra = TCG_REG_O3; @@ -958,7 +959,7 @@ static void build_trampolines(TCGContext *s) while ((uintptr_t)s->code_ptr & 15) { tcg_out_nop(s); } - qemu_st_trampoline[i] = s->code_ptr; + qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr); if (SPARC64) { emit_extend(s, TCG_REG_O2, i); @@ -1038,7 +1039,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_nop(s); /* Epilogue for goto_ptr. */ - s->code_gen_epilogue = s->code_ptr; + tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); /* delay slot */ tcg_out_movi_imm13(s, TCG_REG_O0, 0); @@ -1163,7 +1164,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; - tcg_insn_unit *func; + const tcg_insn_unit *func; tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, @@ -1245,7 +1246,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; - tcg_insn_unit *func; + const tcg_insn_unit *func; tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, @@ -1313,7 +1314,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_movi_imm13(s, TCG_REG_O0, a0); break; } else if (USE_REG_TB) { - intptr_t tb_diff = a0 - (uintptr_t)s->code_gen_ptr; + intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0); if (check_fit_ptr(tb_diff, 13)) { tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); /* Note that TCG_REG_TB has been unwound to O1. */ @@ -1345,8 +1346,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } } else { /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_TB, - (uintptr_t)(s->tb_jmp_target_addr + a0)); + tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL); tcg_out_nop(s); } @@ -1816,16 +1816,16 @@ static const DebugFrame debug_frame = { .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ }; -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, - uintptr_t addr) +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) { intptr_t tb_disp = addr - tc_ptr; - intptr_t br_disp = addr - jmp_addr; + intptr_t br_disp = addr - jmp_rx; tcg_insn_unit i1, i2; /* We can reach the entire address space for ILP32. @@ -1834,9 +1834,9 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, tcg_debug_assert(br_disp == (int32_t)br_disp); if (!USE_REG_TB) { - qatomic_set((uint32_t *)jmp_addr, + qatomic_set((uint32_t *)jmp_rw, deposit32(CALL, 0, 30, br_disp >> 2)); - flush_icache_range(jmp_addr, jmp_addr + 4); + flush_idcache_range(jmp_rx, jmp_rw, 4); return; } @@ -1859,6 +1859,6 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, | INSN_IMM13((tb_disp & 0x3ff) | -0x400)); } - qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1)); - flush_icache_range(jmp_addr, jmp_addr + 8); + qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1)); + flush_idcache_range(jmp_rx, jmp_rw, 8); } diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index d8b0e32e2e..95ab9af955 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -126,6 +126,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_direct_jump 1 +#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 1 #define TCG_TARGET_HAS_extrh_i64_i32 1 @@ -168,7 +169,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc index 05f9b3ccd6..c3ce88e69d 100644 --- a/tcg/tcg-ldst.c.inc +++ b/tcg/tcg-ldst.c.inc @@ -28,7 +28,7 @@ typedef struct TCGLabelQemuLdst { TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ - tcg_insn_unit *raddr; /* gen code addr of the next IR of qemu_ld/st IR */ + const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; } TCGLabelQemuLdst; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 4b8a473fad..0374b5d56d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2664,9 +2664,20 @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) /* QEMU specific operations. */ -void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx) -{ - uintptr_t val = (uintptr_t)tb + idx; +void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) +{ + /* + * Let the jit code return the read-only version of the + * TranslationBlock, so that we minimize the pc-relative + * distance of the address of the exit_tb code to TB. + * This will improve utilization of pc-relative address loads. + * + * TODO: Move this to translator_loop, so that all const + * TranslationBlock pointers refer to read-only memory. + * This requires coordination with targets that do not use + * the translator_loop. + */ + uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx; if (tb == NULL) { tcg_debug_assert(idx == 0); @@ -2883,7 +2894,11 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) } addr = plugin_prep_mem_callbacks(addr); - gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); + if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) { + gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx); + } else { + gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); + } plugin_gen_mem_callbacks(addr, info); if (swap) { diff --git a/tcg/tcg-pool.c.inc b/tcg/tcg-pool.c.inc index 82cbcc89bd..90c2e63b7f 100644 --- a/tcg/tcg-pool.c.inc +++ b/tcg/tcg-pool.c.inc @@ -140,6 +140,8 @@ static int tcg_out_pool_finalize(TCGContext *s) for (; p != NULL; p = p->next) { size_t size = sizeof(tcg_target_ulong) * p->nlong; + uintptr_t value; + if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { if (unlikely(a > s->code_gen_highwater)) { return -1; @@ -148,7 +150,9 @@ static int tcg_out_pool_finalize(TCGContext *s) a += size; l = p; } - if (!patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend)) { + + value = (uintptr_t)tcg_splitwx_to_rx(a) - size; + if (!patch_reloc(p->label, p->rtype, value, p->addend)) { return -2; } } @@ -97,7 +97,7 @@ typedef struct QEMU_PACKED { DebugFrameFDEHeader fde; } DebugFrameHeader; -static void tcg_register_jit_int(void *buf, size_t size, +static void tcg_register_jit_int(const void *buf, size_t size, const void *debug_frame, size_t debug_frame_size) __attribute__((unused)); @@ -149,7 +149,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2); static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, TCGReg base, intptr_t ofs); -static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct); #ifdef TCG_TARGET_NEED_LDST_LABELS @@ -161,6 +161,12 @@ static int tcg_out_ldst_finalize(TCGContext *s); static TCGContext **tcg_ctxs; static unsigned int n_tcg_ctxs; TCGv_env cpu_env = 0; +const void *tcg_code_gen_epilogue; +uintptr_t tcg_splitwx_diff; + +#ifndef CONFIG_TCG_INTERPRETER +tcg_prologue_fn *tcg_qemu_tb_exec; +#endif struct tcg_region_tree { QemuMutex lock; @@ -296,11 +302,11 @@ static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); } -static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) +static void tcg_out_label(TCGContext *s, TCGLabel *l) { tcg_debug_assert(!l->has_value); l->has_value = 1; - l->u.value_ptr = ptr; + l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); } TCGLabel *gen_new_label(void) @@ -401,8 +407,9 @@ static void tcg_region_trees_init(void) } } -static struct tcg_region_tree *tc_ptr_to_region_tree(void *p) +static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp) { + void *p = tcg_splitwx_to_rw(cp); size_t region_idx; if (p < region.start_aligned) { @@ -696,6 +703,7 @@ void tcg_region_init(void) size_t region_size; size_t n_regions; size_t i; + uintptr_t splitwx_diff; n_regions = tcg_n_regions(); @@ -726,6 +734,7 @@ void tcg_region_init(void) region.end -= page_size; /* set guard pages */ + splitwx_diff = tcg_splitwx_diff; for (i = 0; i < region.n; i++) { void *start, *end; int rc; @@ -733,6 +742,10 @@ void tcg_region_init(void) tcg_region_bounds(i, &start, &end); rc = qemu_mprotect_none(end, page_size); g_assert(!rc); + if (splitwx_diff) { + rc = qemu_mprotect_none(end + splitwx_diff, page_size); + g_assert(!rc); + } } tcg_region_trees_init(); @@ -747,6 +760,29 @@ void tcg_region_init(void) #endif } +#ifdef CONFIG_DEBUG_TCG +const void *tcg_splitwx_to_rx(void *rw) +{ + /* Pass NULL pointers unchanged. */ + if (rw) { + g_assert(in_code_gen_buffer(rw)); + rw += tcg_splitwx_diff; + } + return rw; +} + +void *tcg_splitwx_to_rw(const void *rx) +{ + /* Pass NULL pointers unchanged. */ + if (rx) { + rx -= tcg_splitwx_diff; + /* Assert that we end with a pointer in the rw region. */ + g_assert(in_code_gen_buffer(rx)); + } + return (void *)rx; +} +#endif /* CONFIG_DEBUG_TCG */ + static void alloc_tcg_plugin_context(TCGContext *s) { #ifdef CONFIG_PLUGIN @@ -1055,7 +1091,17 @@ void tcg_prologue_init(TCGContext *s) s->code_ptr = buf0; s->code_buf = buf0; s->data_gen_ptr = NULL; - s->code_gen_prologue = buf0; + + /* + * The region trees are not yet configured, but tcg_splitwx_to_rx + * needs the bounds for an assert. + */ + region.start = buf0; + region.end = buf0 + total_size; + +#ifndef CONFIG_TCG_INTERPRETER + tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0); +#endif /* Compute a high-water mark, at which we voluntarily flush the buffer and start over. The size here is arbitrary, significantly larger @@ -1078,7 +1124,10 @@ void tcg_prologue_init(TCGContext *s) #endif buf1 = s->code_ptr; - flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); +#ifndef CONFIG_TCG_INTERPRETER + flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0, + tcg_ptr_byte_diff(buf1, buf0)); +#endif /* Deduct the prologue from the buffer. */ prologue_size = tcg_current_code_size(s); @@ -1088,7 +1137,7 @@ void tcg_prologue_init(TCGContext *s) total_size -= prologue_size; s->code_gen_buffer_size = total_size; - tcg_register_jit(s->code_gen_buffer, total_size); + tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size); #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { @@ -1123,7 +1172,7 @@ void tcg_prologue_init(TCGContext *s) /* Assert that goto_ptr is implemented completely. */ if (TCG_TARGET_HAS_goto_ptr) { - tcg_debug_assert(s->code_gen_epilogue != NULL); + tcg_debug_assert(tcg_code_gen_epilogue != NULL); } } @@ -1427,6 +1476,9 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_qemu_st_i64: return true; + case INDEX_op_qemu_st8_i32: + return TCG_TARGET_HAS_qemu_st8_i32; + case INDEX_op_goto_ptr: return TCG_TARGET_HAS_goto_ptr; @@ -2087,6 +2139,7 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) break; case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st8_i32: case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: { @@ -4216,8 +4269,13 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) tcg_reg_alloc_start(s); - s->code_buf = tb->tc.ptr; - s->code_ptr = tb->tc.ptr; + /* + * Reset the buffer pointers when restarting after overflow. + * TODO: Move this into translate-all.c with the rest of the + * buffer management. Having only this done here is confusing. + */ + s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); + s->code_ptr = s->code_buf; #ifdef TCG_TARGET_NEED_LDST_LABELS QSIMPLEQ_INIT(&s->ldst_labels); @@ -4271,7 +4329,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) break; case INDEX_op_set_label: tcg_reg_alloc_bb_end(s, s->reserved_regs); - tcg_out_label(s, arg_label(op->args[0]), s->code_ptr); + tcg_out_label(s, arg_label(op->args[0])); break; case INDEX_op_call: tcg_reg_alloc_call(s, op); @@ -4320,8 +4378,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) return -2; } +#ifndef CONFIG_TCG_INTERPRETER /* flush instruction cache */ - flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); + flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), + (uintptr_t)s->code_buf, + tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); +#endif return tcg_current_code_size(s); } @@ -4449,7 +4511,7 @@ static int find_string(const char *strtab, const char *str) } } -static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, +static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, const void *debug_frame, size_t debug_frame_size) { @@ -4651,13 +4713,13 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, /* No support for the feature. Provide the entry point expected by exec.c, and implement the internal function we declared earlier. */ -static void tcg_register_jit_int(void *buf, size_t size, +static void tcg_register_jit_int(const void *buf, size_t size, const void *debug_frame, size_t debug_frame_size) { } -void tcg_register_jit(void *buf, size_t buf_size) +void tcg_register_jit(const void *buf, size_t buf_size) { } #endif /* ELF_HOST_MACHINE */ @@ -164,34 +164,34 @@ static uint64_t tci_uint64(uint32_t high, uint32_t low) #endif /* Read constant (native size) from bytecode. */ -static tcg_target_ulong tci_read_i(uint8_t **tb_ptr) +static tcg_target_ulong tci_read_i(const uint8_t **tb_ptr) { - tcg_target_ulong value = *(tcg_target_ulong *)(*tb_ptr); + tcg_target_ulong value = *(const tcg_target_ulong *)(*tb_ptr); *tb_ptr += sizeof(value); return value; } /* Read unsigned constant (32 bit) from bytecode. */ -static uint32_t tci_read_i32(uint8_t **tb_ptr) +static uint32_t tci_read_i32(const uint8_t **tb_ptr) { - uint32_t value = *(uint32_t *)(*tb_ptr); + uint32_t value = *(const uint32_t *)(*tb_ptr); *tb_ptr += sizeof(value); return value; } /* Read signed constant (32 bit) from bytecode. */ -static int32_t tci_read_s32(uint8_t **tb_ptr) +static int32_t tci_read_s32(const uint8_t **tb_ptr) { - int32_t value = *(int32_t *)(*tb_ptr); + int32_t value = *(const int32_t *)(*tb_ptr); *tb_ptr += sizeof(value); return value; } #if TCG_TARGET_REG_BITS == 64 /* Read constant (64 bit) from bytecode. */ -static uint64_t tci_read_i64(uint8_t **tb_ptr) +static uint64_t tci_read_i64(const uint8_t **tb_ptr) { - uint64_t value = *(uint64_t *)(*tb_ptr); + uint64_t value = *(const uint64_t *)(*tb_ptr); *tb_ptr += sizeof(value); return value; } @@ -199,7 +199,7 @@ static uint64_t tci_read_i64(uint8_t **tb_ptr) /* Read indexed register (native size) from bytecode. */ static tcg_target_ulong -tci_read_r(const tcg_target_ulong *regs, uint8_t **tb_ptr) +tci_read_r(const tcg_target_ulong *regs, const uint8_t **tb_ptr) { tcg_target_ulong value = tci_read_reg(regs, **tb_ptr); *tb_ptr += 1; @@ -207,7 +207,7 @@ tci_read_r(const tcg_target_ulong *regs, uint8_t **tb_ptr) } /* Read indexed register (8 bit) from bytecode. */ -static uint8_t tci_read_r8(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint8_t tci_read_r8(const tcg_target_ulong *regs, const uint8_t **tb_ptr) { uint8_t value = tci_read_reg8(regs, **tb_ptr); *tb_ptr += 1; @@ -216,7 +216,7 @@ static uint8_t tci_read_r8(const tcg_target_ulong *regs, uint8_t **tb_ptr) #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 /* Read indexed register (8 bit signed) from bytecode. */ -static int8_t tci_read_r8s(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static int8_t tci_read_r8s(const tcg_target_ulong *regs, const uint8_t **tb_ptr) { int8_t value = tci_read_reg8s(regs, **tb_ptr); *tb_ptr += 1; @@ -225,7 +225,8 @@ static int8_t tci_read_r8s(const tcg_target_ulong *regs, uint8_t **tb_ptr) #endif /* Read indexed register (16 bit) from bytecode. */ -static uint16_t tci_read_r16(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint16_t tci_read_r16(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { uint16_t value = tci_read_reg16(regs, **tb_ptr); *tb_ptr += 1; @@ -234,7 +235,8 @@ static uint16_t tci_read_r16(const tcg_target_ulong *regs, uint8_t **tb_ptr) #if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 /* Read indexed register (16 bit signed) from bytecode. */ -static int16_t tci_read_r16s(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static int16_t tci_read_r16s(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { int16_t value = tci_read_reg16s(regs, **tb_ptr); *tb_ptr += 1; @@ -243,7 +245,8 @@ static int16_t tci_read_r16s(const tcg_target_ulong *regs, uint8_t **tb_ptr) #endif /* Read indexed register (32 bit) from bytecode. */ -static uint32_t tci_read_r32(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint32_t tci_read_r32(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { uint32_t value = tci_read_reg32(regs, **tb_ptr); *tb_ptr += 1; @@ -252,14 +255,16 @@ static uint32_t tci_read_r32(const tcg_target_ulong *regs, uint8_t **tb_ptr) #if TCG_TARGET_REG_BITS == 32 /* Read two indexed registers (2 * 32 bit) from bytecode. */ -static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint64_t tci_read_r64(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { uint32_t low = tci_read_r32(regs, tb_ptr); return tci_uint64(tci_read_r32(regs, tb_ptr), low); } #elif TCG_TARGET_REG_BITS == 64 /* Read indexed register (32 bit signed) from bytecode. */ -static int32_t tci_read_r32s(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static int32_t tci_read_r32s(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { int32_t value = tci_read_reg32s(regs, **tb_ptr); *tb_ptr += 1; @@ -267,7 +272,8 @@ static int32_t tci_read_r32s(const tcg_target_ulong *regs, uint8_t **tb_ptr) } /* Read indexed register (64 bit) from bytecode. */ -static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint64_t tci_read_r64(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { uint64_t value = tci_read_reg64(regs, **tb_ptr); *tb_ptr += 1; @@ -277,7 +283,7 @@ static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr) /* Read indexed register(s) with target address from bytecode. */ static target_ulong -tci_read_ulong(const tcg_target_ulong *regs, uint8_t **tb_ptr) +tci_read_ulong(const tcg_target_ulong *regs, const uint8_t **tb_ptr) { target_ulong taddr = tci_read_r(regs, tb_ptr); #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS @@ -288,7 +294,7 @@ tci_read_ulong(const tcg_target_ulong *regs, uint8_t **tb_ptr) /* Read indexed register or constant (native size) from bytecode. */ static tcg_target_ulong -tci_read_ri(const tcg_target_ulong *regs, uint8_t **tb_ptr) +tci_read_ri(const tcg_target_ulong *regs, const uint8_t **tb_ptr) { tcg_target_ulong value; TCGReg r = **tb_ptr; @@ -302,7 +308,8 @@ tci_read_ri(const tcg_target_ulong *regs, uint8_t **tb_ptr) } /* Read indexed register or constant (32 bit) from bytecode. */ -static uint32_t tci_read_ri32(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint32_t tci_read_ri32(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { uint32_t value; TCGReg r = **tb_ptr; @@ -317,14 +324,16 @@ static uint32_t tci_read_ri32(const tcg_target_ulong *regs, uint8_t **tb_ptr) #if TCG_TARGET_REG_BITS == 32 /* Read two indexed registers or constants (2 * 32 bit) from bytecode. */ -static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint64_t tci_read_ri64(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { uint32_t low = tci_read_ri32(regs, tb_ptr); return tci_uint64(tci_read_ri32(regs, tb_ptr), low); } #elif TCG_TARGET_REG_BITS == 64 /* Read indexed register or constant (64 bit) from bytecode. */ -static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr) +static uint64_t tci_read_ri64(const tcg_target_ulong *regs, + const uint8_t **tb_ptr) { uint64_t value; TCGReg r = **tb_ptr; @@ -338,7 +347,7 @@ static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr) } #endif -static tcg_target_ulong tci_read_label(uint8_t **tb_ptr) +static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) { tcg_target_ulong label = tci_read_i(tb_ptr); tci_assert(label != 0); @@ -481,9 +490,10 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) * One possible operation in the pseudo code is a call to binary code. * Therefore, disable CFI checks in the interpreter function */ -QEMU_DISABLE_CFI -uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) +uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, + const void *v_tb_ptr) { + const uint8_t *tb_ptr = v_tb_ptr; tcg_target_ulong regs[TCG_TARGET_NB_REGS]; long tcg_temps[CPU_TEMP_BUF_NLONGS]; uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS); @@ -497,7 +507,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) TCGOpcode opc = tb_ptr[0]; #if defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG) uint8_t op_size = tb_ptr[1]; - uint8_t *old_code_ptr = tb_ptr; + const uint8_t *old_code_ptr = tb_ptr; #endif tcg_target_ulong t0; tcg_target_ulong t1; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 231b9b1775..d5a4d9d37c 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -545,7 +545,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, old_code_ptr[1] = s->code_ptr - old_code_ptr; } -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) { uint8_t *old_code_ptr = s->code_ptr; tcg_out_op_t(s, INDEX_op_call); diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index b84480f989..bb784e018e 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -88,6 +88,7 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_goto_ptr 0 #define TCG_TARGET_HAS_direct_jump 1 +#define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extrl_i64_i32 0 @@ -198,11 +199,11 @@ void tci_disas(uint8_t opc); #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, - uintptr_t jmp_addr, uintptr_t addr) +static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) { /* patch the branch destination */ - qatomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4)); + qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); /* no need to flush icache explicitly */ } diff --git a/util/cacheflush.c b/util/cacheflush.c index 2881832a38..6a20723902 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -7,12 +7,81 @@ #include "qemu/osdep.h" #include "qemu/cacheflush.h" +#include "qemu/bitops.h" #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) /* Caches are coherent and do not require flushing; symbol inline. */ +#elif defined(__aarch64__) + +#ifdef CONFIG_DARWIN +/* Apple does not expose CTR_EL0, so we must use system interfaces. */ +extern void sys_icache_invalidate(void *start, size_t len); +extern void sys_dcache_flush(void *start, size_t len); +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) +{ + sys_dcache_flush((void *)rw, len); + sys_icache_invalidate((void *)rx, len); +} +#else + +/* + * TODO: unify this with cacheinfo.c. + * We want to save the whole contents of CTR_EL0, so that we + * have more than the linesize, but also IDC and DIC. + */ +static unsigned int save_ctr_el0; +static void __attribute__((constructor)) init_ctr_el0(void) +{ + asm volatile("mrs\t%0, ctr_el0" : "=r"(save_ctr_el0)); +} + +/* + * This is a copy of gcc's __aarch64_sync_cache_range, modified + * to fit this three-operand interface. + */ +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) +{ + const unsigned CTR_IDC = 1u << 28; + const unsigned CTR_DIC = 1u << 29; + const unsigned int ctr_el0 = save_ctr_el0; + const uintptr_t icache_lsize = 4 << extract32(ctr_el0, 0, 4); + const uintptr_t dcache_lsize = 4 << extract32(ctr_el0, 16, 4); + uintptr_t p; + + /* + * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification + * is not required for instruction to data coherence. + */ + if (!(ctr_el0 & CTR_IDC)) { + /* + * Loop over the address range, clearing one cache line at once. + * Data cache must be flushed to unification first to make sure + * the instruction cache fetches the updated data. + */ + for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { + asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); + } + asm volatile("dsb\tish" : : : "memory"); + } + + /* + * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point + * of Unification is not required for instruction to data coherence. + */ + if (!(ctr_el0 & CTR_DIC)) { + for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { + asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); + } + asm volatile ("dsb\tish" : : : "memory"); + } + + asm volatile("isb" : : : "memory"); +} +#endif /* CONFIG_DARWIN */ + #elif defined(__mips__) #ifdef __OpenBSD__ @@ -21,29 +90,32 @@ #include <sys/cachectl.h> #endif -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - cacheflush((void *)start, stop - start, ICACHE); + if (rx != rw) { + cacheflush((void *)rw, len, DCACHE); + } + cacheflush((void *)rx, len, ICACHE); } #elif defined(__powerpc__) -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - uintptr_t p, start1, stop1; + uintptr_t p, b, e; size_t dsize = qemu_dcache_linesize; size_t isize = qemu_icache_linesize; - start1 = start & ~(dsize - 1); - stop1 = (stop + dsize - 1) & ~(dsize - 1); - for (p = start1; p < stop1; p += dsize) { + b = rw & ~(dsize - 1); + e = (rw + len + dsize - 1) & ~(dsize - 1); + for (p = b; p < e; p += dsize) { asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); } asm volatile ("sync" : : : "memory"); - start &= start & ~(isize - 1); - stop1 = (stop + isize - 1) & ~(isize - 1); - for (p = start1; p < stop1; p += isize) { + b = rx & ~(isize - 1); + e = (rx + len + isize - 1) & ~(isize - 1); + for (p = b; p < e; p += isize) { asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); } asm volatile ("sync" : : : "memory"); @@ -52,20 +124,23 @@ void flush_icache_range(uintptr_t start, uintptr_t stop) #elif defined(__sparc__) -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - uintptr_t p; - - for (p = start & -8; p < ((stop + 7) & -8); p += 8) { + /* No additional data flush to the RW virtual address required. */ + uintptr_t p, end = (rx + len + 7) & -8; + for (p = rx & -8; p < end; p += 8) { __asm__ __volatile__("flush\t%0" : : "r" (p)); } } #else -void flush_icache_range(uintptr_t start, uintptr_t stop) +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { - __builtin___clear_cache((char *)start, (char *)stop); + if (rw != rx) { + __builtin___clear_cache((char *)rw, (char *)rw + len); + } + __builtin___clear_cache((char *)rx, (char *)rx + len); } #endif diff --git a/util/cacheinfo.c b/util/cacheinfo.c index 7804c186b6..b182f0b693 100644 --- a/util/cacheinfo.c +++ b/util/cacheinfo.c @@ -166,9 +166,11 @@ static void fallback_cache_info(int *isize, int *dsize) *isize = *dsize; } else { #if defined(_ARCH_PPC) - /* For PPC, we're going to use the icache size computed for - flush_icache_range. Which means that we must use the - architecture minimum. */ + /* + * For PPC, we're going to use the cache sizes computed for + * flush_idcache_range. Which means that we must use the + * architecture minimum. + */ *isize = *dsize = 16; #else /* Otherwise, 64 bytes is not uncommon. */ diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f1e2801b11..359c52df12 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -201,6 +201,8 @@ void *qemu_try_memalign(size_t alignment, size_t size) if (alignment < sizeof(void*)) { alignment = sizeof(void*); + } else { + g_assert(is_power_of_2(alignment)); } #if defined(CONFIG_POSIX_MEMALIGN) diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 01787df74c..e6f83e10ed 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -39,6 +39,7 @@ #include "trace.h" #include "qemu/sockets.h" #include "qemu/cutils.h" +#include <malloc.h> /* this must come after including "trace.h" */ #include <shlobj.h> @@ -56,10 +57,9 @@ void *qemu_try_memalign(size_t alignment, size_t size) { void *ptr; - if (!size) { - abort(); - } - ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + g_assert(size != 0); + g_assert(is_power_of_2(alignment)); + ptr = _aligned_malloc(alignment, size); trace_qemu_memalign(alignment, size, ptr); return ptr; } @@ -93,9 +93,7 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared) void qemu_vfree(void *ptr) { trace_qemu_vfree(ptr); - if (ptr) { - VirtualFree(ptr, 0, MEM_RELEASE); - } + _aligned_free(ptr); } void qemu_anon_ram_free(void *ptr, size_t size) |