aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-01-07 20:34:05 +0000
committerPeter Maydell <peter.maydell@linaro.org>2021-01-07 20:34:05 +0000
commite79de63ab1bd1f6550e7b915e433bec1ad1a870a (patch)
treead29060323ecea1b9a0f60d5b08984f310b30e44
parent470dd6bd360782f5137f7e3376af6a44658eb1d3 (diff)
parente5e2e4c73926f6f3c1f5da24a350e4345d5ad232 (diff)
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210107' into staging
Build fix for ppc64 centos7. Reduce the use of scratch registers for tcg/i386. Use _aligned_malloc for Win32. Enable split w^x code gen buffers. # gpg: Signature made Thu 07 Jan 2021 20:06:38 GMT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth-gitlab/tags/pull-tcg-20210107: (47 commits) tcg: Constify TCGLabelQemuLdst.raddr tcg: Constify tcg_code_gen_epilogue tcg: Remove TCG_TARGET_SUPPORT_MIRROR tcg/arm: Support split-wx code generation tcg/mips: Support split-wx code generation tcg/mips: Do not assert on relocation overflow accel/tcg: Add mips support to alloc_code_gen_buffer_splitwx_memfd tcg/riscv: Support split-wx code generation tcg/riscv: Remove branch-over-branch fallback tcg/riscv: Fix branch range checks tcg/s390: Support split-wx code generation tcg/s390: Use tcg_tbrel_diff tcg/sparc: Support split-wx code generation tcg/sparc: Use tcg_tbrel_diff tcg/ppc: Support split-wx code generation tcg/ppc: Use tcg_out_mem_long to reset TCG_REG_TB tcg/ppc: Use tcg_tbrel_diff tcg: Introduce tcg_tbrel_diff tcg/tci: Push const down through bytecode reading disas: Push const down through host disassembly ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--accel/tcg/cpu-exec.c41
-rw-r--r--accel/tcg/tcg-all.c26
-rw-r--r--accel/tcg/tcg-runtime.c4
-rw-r--r--accel/tcg/tcg-runtime.h2
-rw-r--r--accel/tcg/trace-events2
-rw-r--r--accel/tcg/translate-all.c305
-rw-r--r--accel/tcg/translator.c4
-rw-r--r--bsd-user/main.c2
-rw-r--r--disas.c2
-rw-r--r--disas/capstone.c2
-rw-r--r--include/disas/dis-asm.h4
-rw-r--r--include/disas/disas.h2
-rw-r--r--include/exec/exec-all.h2
-rw-r--r--include/exec/gen-icount.h4
-rw-r--r--include/exec/log.h2
-rw-r--r--include/exec/translator.h2
-rw-r--r--include/hw/core/cpu.h3
-rw-r--r--include/qemu/cacheflush.h15
-rw-r--r--include/sysemu/tcg.h3
-rw-r--r--include/tcg/tcg-op.h2
-rw-r--r--include/tcg/tcg-opc.h5
-rw-r--r--include/tcg/tcg.h61
-rw-r--r--linux-user/ioctls.h2
-rw-r--r--linux-user/main.c2
-rw-r--r--qemu-options.hx7
-rw-r--r--softmmu/physmem.c2
-rw-r--r--target/arm/cpu.c3
-rw-r--r--target/arm/translate-a64.c2
-rw-r--r--target/avr/cpu.c3
-rw-r--r--target/hppa/cpu.c3
-rw-r--r--target/i386/tcg/tcg-cpu.c3
-rw-r--r--target/microblaze/cpu.c3
-rw-r--r--target/mips/cpu.c3
-rw-r--r--target/riscv/cpu.c3
-rw-r--r--target/rx/cpu.c3
-rw-r--r--target/sh4/cpu.c3
-rw-r--r--target/sparc/cpu.c3
-rw-r--r--target/tricore/cpu.c2
-rw-r--r--tcg/README5
-rw-r--r--tcg/aarch64/tcg-target.c.inc75
-rw-r--r--tcg/aarch64/tcg-target.h3
-rw-r--r--tcg/arm/tcg-target.c.inc41
-rw-r--r--tcg/arm/tcg-target.h3
-rw-r--r--tcg/i386/tcg-target.c.inc174
-rw-r--r--tcg/i386/tcg-target.h12
-rw-r--r--tcg/mips/tcg-target.c.inc97
-rw-r--r--tcg/mips/tcg-target.h3
-rw-r--r--tcg/optimize.c1
-rw-r--r--tcg/ppc/tcg-target.c.inc88
-rw-r--r--tcg/ppc/tcg-target.h3
-rw-r--r--tcg/riscv/tcg-target.c.inc125
-rw-r--r--tcg/riscv/tcg-target.h3
-rw-r--r--tcg/s390/tcg-target.c.inc91
-rw-r--r--tcg/s390/tcg-target.h9
-rw-r--r--tcg/sparc/tcg-target.c.inc58
-rw-r--r--tcg/sparc/tcg-target.h3
-rw-r--r--tcg/tcg-ldst.c.inc2
-rw-r--r--tcg/tcg-op.c23
-rw-r--r--tcg/tcg-pool.c.inc6
-rw-r--r--tcg/tcg.c94
-rw-r--r--tcg/tci.c62
-rw-r--r--tcg/tci/tcg-target.c.inc2
-rw-r--r--tcg/tci/tcg-target.h7
-rw-r--r--util/cacheflush.c107
-rw-r--r--util/cacheinfo.c8
-rw-r--r--util/oslib-posix.c2
-rw-r--r--util/oslib-win32.c12
67 files changed, 1033 insertions, 628 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index fa325bb3d8..e0df9b6a1d 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -154,14 +154,13 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
* TCG is not considered a security-sensitive part of QEMU so this does not
* affect the impact of CFI in environment with high security requirements
*/
-QEMU_DISABLE_CFI
-static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
+static inline TranslationBlock * QEMU_DISABLE_CFI
+cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
{
CPUArchState *env = cpu->env_ptr;
uintptr_t ret;
TranslationBlock *last_tb;
- int tb_exit;
- uint8_t *tb_ptr = itb->tc.ptr;
+ const void *tb_ptr = itb->tc.ptr;
qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
"Trace %d: %p ["
@@ -188,11 +187,20 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
ret = tcg_qemu_tb_exec(env, tb_ptr);
cpu->can_do_io = 1;
- last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
- tb_exit = ret & TB_EXIT_MASK;
- trace_exec_tb_exit(last_tb, tb_exit);
+ /*
+ * TODO: Delay swapping back to the read-write region of the TB
+ * until we actually need to modify the TB. The read-only copy,
+ * coming from the rx region, shares the same host TLB entry as
+ * the code that executed the exit_tb opcode that arrived here.
+ * If we insist on touching both the RX and the RW pages, we
+ * double the host TLB pressure.
+ */
+ last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
+ *tb_exit = ret & TB_EXIT_MASK;
+
+ trace_exec_tb_exit(last_tb, *tb_exit);
- if (tb_exit > TB_EXIT_IDX1) {
+ if (*tb_exit > TB_EXIT_IDX1) {
/* We didn't start executing this TB (eg because the instruction
* counter hit zero); we must restore the guest PC to the address
* of the start of the TB.
@@ -210,7 +218,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
cc->set_pc(cpu, last_tb->pc);
}
}
- return ret;
+ return last_tb;
}
#ifndef CONFIG_USER_ONLY
@@ -221,6 +229,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
{
TranslationBlock *tb;
uint32_t cflags = curr_cflags() | CF_NOCACHE;
+ int tb_exit;
if (ignore_icount) {
cflags &= ~CF_USE_ICOUNT;
@@ -238,7 +247,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
/* execute the generated code */
trace_exec_tb_nocache(tb, tb->pc);
- cpu_tb_exec(cpu, tb);
+ cpu_tb_exec(cpu, tb, &tb_exit);
mmap_lock();
tb_phys_invalidate(tb, -1);
@@ -272,6 +281,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
uint32_t flags;
uint32_t cflags = 1;
uint32_t cf_mask = cflags & CF_HASH_MASK;
+ int tb_exit;
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
start_exclusive();
@@ -288,7 +298,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb(tb, pc);
- cpu_tb_exec(cpu, tb);
+ cpu_tb_exec(cpu, tb, &tb_exit);
cpu_exec_exit(cpu);
} else {
/*
@@ -382,7 +392,9 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
if (TCG_TARGET_HAS_direct_jump) {
uintptr_t offset = tb->jmp_target_arg[n];
uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr;
- tb_target_set_jmp_target(tc_ptr, tc_ptr + offset, addr);
+ uintptr_t jmp_rx = tc_ptr + offset;
+ uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
+ tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr);
} else {
tb->jmp_target_arg[n] = addr;
}
@@ -682,13 +694,10 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
TranslationBlock **last_tb, int *tb_exit)
{
- uintptr_t ret;
int32_t insns_left;
trace_exec_tb(tb, tb->pc);
- ret = cpu_tb_exec(cpu, tb);
- tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
- *tb_exit = ret & TB_EXIT_MASK;
+ tb = cpu_tb_exec(cpu, tb, tb_exit);
if (*tb_exit != TB_EXIT_REQUESTED) {
*last_tb = tb;
return;
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index 1ac0b76515..2eea8c32ee 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -38,6 +38,7 @@ struct TCGState {
AccelState parent_obj;
bool mttcg_enabled;
+ int splitwx_enabled;
unsigned long tb_size;
};
typedef struct TCGState TCGState;
@@ -94,6 +95,13 @@ static void tcg_accel_instance_init(Object *obj)
TCGState *s = TCG_STATE(obj);
s->mttcg_enabled = default_mttcg_enabled();
+
+ /* If debugging enabled, default "auto on", otherwise off. */
+#ifdef CONFIG_DEBUG_TCG
+ s->splitwx_enabled = -1;
+#else
+ s->splitwx_enabled = 0;
+#endif
}
bool mttcg_enabled;
@@ -102,7 +110,7 @@ static int tcg_init(MachineState *ms)
{
TCGState *s = TCG_STATE(current_accel());
- tcg_exec_init(s->tb_size * 1024 * 1024);
+ tcg_exec_init(s->tb_size * 1024 * 1024, s->splitwx_enabled);
mttcg_enabled = s->mttcg_enabled;
/*
@@ -179,6 +187,18 @@ static void tcg_set_tb_size(Object *obj, Visitor *v,
s->tb_size = value;
}
+static bool tcg_get_splitwx(Object *obj, Error **errp)
+{
+ TCGState *s = TCG_STATE(obj);
+ return s->splitwx_enabled;
+}
+
+static void tcg_set_splitwx(Object *obj, bool value, Error **errp)
+{
+ TCGState *s = TCG_STATE(obj);
+ s->splitwx_enabled = value;
+}
+
static void tcg_accel_class_init(ObjectClass *oc, void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
@@ -196,6 +216,10 @@ static void tcg_accel_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, "tb-size",
"TCG translation block cache size");
+ object_class_property_add_bool(oc, "split-wx",
+ tcg_get_splitwx, tcg_set_splitwx);
+ object_class_property_set_description(oc, "split-wx",
+ "Map jit pages into separate RW and RX regions");
}
static const TypeInfo tcg_accel_type = {
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 446465a09a..d736f4ff55 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -145,7 +145,7 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
return ctpop64(arg);
}
-void *HELPER(lookup_tb_ptr)(CPUArchState *env)
+const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
{
CPUState *cpu = env_cpu(env);
TranslationBlock *tb;
@@ -154,7 +154,7 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env)
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
if (tb == NULL) {
- return tcg_ctx->code_gen_epilogue;
+ return tcg_code_gen_epilogue;
}
qemu_log_mask_and_addr(CPU_LOG_EXEC, pc,
"Chain %d: %p ["
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 2e36d6eb0c..91a5b7e85f 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -24,7 +24,7 @@ DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
+DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, cptr, env)
DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events
index 385b9f749b..6eefb37f5d 100644
--- a/accel/tcg/trace-events
+++ b/accel/tcg/trace-events
@@ -7,4 +7,4 @@ exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=0x%x"
# translate-all.c
-translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
+translate_block(void *tb, uintptr_t pc, const void *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index a1803a1026..e9de6ff9dd 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -59,6 +59,7 @@
#include "sysemu/cpus.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
+#include "qapi/error.h"
/* #define DEBUG_TB_INVALIDATE */
/* #define DEBUG_TB_FLUSH */
@@ -269,9 +270,9 @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val)
/* Decode a signed leb128 sequence at *PP; increment *PP past the
decoded value. Return the decoded value. */
-static target_long decode_sleb128(uint8_t **pp)
+static target_long decode_sleb128(const uint8_t **pp)
{
- uint8_t *p = *pp;
+ const uint8_t *p = *pp;
target_long val = 0;
int byte, shift = 0;
@@ -342,7 +343,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
CPUArchState *env = cpu->env_ptr;
- uint8_t *p = tb->tc.ptr + tb->tc.size;
+ const uint8_t *p = tb->tc.ptr + tb->tc.size;
int i, j, num_insns = tb->icount;
#ifdef CONFIG_PROFILER
TCGProfile *prof = &tcg_ctx->prof;
@@ -392,27 +393,18 @@ void tb_destroy(TranslationBlock *tb)
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
{
- TranslationBlock *tb;
- bool r = false;
- uintptr_t check_offset;
-
- /* The host_pc has to be in the region of current code buffer. If
- * it is not we will not be able to resolve it here. The two cases
- * where host_pc will not be correct are:
+ /*
+ * The host_pc has to be in the rx region of the code buffer.
+ * If it is not we will not be able to resolve it here.
+ * The two cases where host_pc will not be correct are:
*
* - fault during translation (instruction fetch)
* - fault from helper (not using GETPC() macro)
*
* Either way we need return early as we can't resolve it here.
- *
- * We are using unsigned arithmetic so if host_pc <
- * tcg_init_ctx.code_gen_buffer check_offset will wrap to way
- * above the code_gen_buffer_size
*/
- check_offset = host_pc - (uintptr_t) tcg_init_ctx.code_gen_buffer;
-
- if (check_offset < tcg_init_ctx.code_gen_buffer_size) {
- tb = tcg_tb_lookup(host_pc);
+ if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
+ TranslationBlock *tb = tcg_tb_lookup(host_pc);
if (tb) {
cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
if (tb_cflags(tb) & CF_NOCACHE) {
@@ -421,11 +413,10 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
tcg_tb_remove(tb);
tb_destroy(tb);
}
- r = true;
+ return true;
}
}
-
- return r;
+ return false;
}
static void page_init(void)
@@ -973,7 +964,7 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
-static inline size_t size_code_gen_buffer(size_t tb_size)
+static size_t size_code_gen_buffer(size_t tb_size)
{
/* Size the buffer. */
if (tb_size == 0) {
@@ -1024,22 +1015,27 @@ static inline void *split_cross_256mb(void *buf1, size_t size1)
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
__attribute__((aligned(CODE_GEN_ALIGN)));
-static inline void *alloc_code_gen_buffer(void)
+static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
{
- void *buf = static_code_gen_buffer;
- void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
+ void *buf, *end;
size_t size;
+ if (splitwx > 0) {
+ error_setg(errp, "jit split-wx not supported");
+ return false;
+ }
+
/* page-align the beginning and end of the buffer */
+ buf = static_code_gen_buffer;
+ end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
size = end - buf;
/* Honor a command-line option limiting the size of the buffer. */
- if (size > tcg_ctx->code_gen_buffer_size) {
- size = QEMU_ALIGN_DOWN(tcg_ctx->code_gen_buffer_size,
- qemu_real_host_page_size);
+ if (size > tb_size) {
+ size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
}
tcg_ctx->code_gen_buffer_size = size;
@@ -1051,31 +1047,49 @@ static inline void *alloc_code_gen_buffer(void)
#endif
if (qemu_mprotect_rwx(buf, size)) {
- abort();
+ error_setg_errno(errp, errno, "mprotect of jit buffer");
+ return false;
}
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
- return buf;
+ tcg_ctx->code_gen_buffer = buf;
+ return true;
}
#elif defined(_WIN32)
-static inline void *alloc_code_gen_buffer(void)
+static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
- size_t size = tcg_ctx->code_gen_buffer_size;
- return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
- PAGE_EXECUTE_READWRITE);
+ void *buf;
+
+ if (splitwx > 0) {
+ error_setg(errp, "jit split-wx not supported");
+ return false;
+ }
+
+ buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
+ PAGE_EXECUTE_READWRITE);
+ if (buf == NULL) {
+ error_setg_win32(errp, GetLastError(),
+ "allocate %zu bytes for jit buffer", size);
+ return false;
+ }
+
+ tcg_ctx->code_gen_buffer = buf;
+ tcg_ctx->code_gen_buffer_size = size;
+ return true;
}
#else
-static inline void *alloc_code_gen_buffer(void)
+static bool alloc_code_gen_buffer_anon(size_t size, int prot,
+ int flags, Error **errp)
{
- int prot = PROT_WRITE | PROT_READ | PROT_EXEC;
- int flags = MAP_PRIVATE | MAP_ANONYMOUS;
- size_t size = tcg_ctx->code_gen_buffer_size;
void *buf;
buf = mmap(NULL, size, prot, flags, -1, 0);
if (buf == MAP_FAILED) {
- return NULL;
+ error_setg_errno(errp, errno,
+ "allocate %zu bytes for jit buffer", size);
+ return false;
}
+ tcg_ctx->code_gen_buffer_size = size;
#ifdef __mips__
if (cross_256mb(buf, size)) {
@@ -1114,19 +1128,183 @@ static inline void *alloc_code_gen_buffer(void)
/* Request large pages for the buffer. */
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
- return buf;
+ tcg_ctx->code_gen_buffer = buf;
+ return true;
}
-#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
-static inline void code_gen_alloc(size_t tb_size)
+#ifndef CONFIG_TCG_INTERPRETER
+#ifdef CONFIG_POSIX
+#include "qemu/memfd.h"
+
+static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
{
- tcg_ctx->code_gen_buffer_size = size_code_gen_buffer(tb_size);
- tcg_ctx->code_gen_buffer = alloc_code_gen_buffer();
- if (tcg_ctx->code_gen_buffer == NULL) {
- fprintf(stderr, "Could not allocate dynamic translator buffer\n");
- exit(1);
+ void *buf_rw = NULL, *buf_rx = MAP_FAILED;
+ int fd = -1;
+
+#ifdef __mips__
+ /* Find space for the RX mapping, vs the 256MiB regions. */
+ if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS |
+ MAP_NORESERVE, errp)) {
+ return false;
+ }
+ /* The size of the mapping may have been adjusted. */
+ size = tcg_ctx->code_gen_buffer_size;
+ buf_rx = tcg_ctx->code_gen_buffer;
+#endif
+
+ buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
+ if (buf_rw == NULL) {
+ goto fail;
+ }
+
+#ifdef __mips__
+ void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
+ MAP_SHARED | MAP_FIXED, fd, 0);
+ if (tmp != buf_rx) {
+ goto fail_rx;
+ }
+#else
+ buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+ if (buf_rx == MAP_FAILED) {
+ goto fail_rx;
+ }
+#endif
+
+ close(fd);
+ tcg_ctx->code_gen_buffer = buf_rw;
+ tcg_ctx->code_gen_buffer_size = size;
+ tcg_splitwx_diff = buf_rx - buf_rw;
+
+ /* Request large pages for the buffer and the splitwx. */
+ qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
+ qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
+ return true;
+
+ fail_rx:
+ error_setg_errno(errp, errno, "failed to map shared memory for execute");
+ fail:
+ if (buf_rx != MAP_FAILED) {
+ munmap(buf_rx, size);
}
+ if (buf_rw) {
+ munmap(buf_rw, size);
+ }
+ if (fd >= 0) {
+ close(fd);
+ }
+ return false;
}
+#endif /* CONFIG_POSIX */
+
+#ifdef CONFIG_DARWIN
+#include <mach/mach.h>
+
+extern kern_return_t mach_vm_remap(vm_map_t target_task,
+ mach_vm_address_t *target_address,
+ mach_vm_size_t size,
+ mach_vm_offset_t mask,
+ int flags,
+ vm_map_t src_task,
+ mach_vm_address_t src_address,
+ boolean_t copy,
+ vm_prot_t *cur_protection,
+ vm_prot_t *max_protection,
+ vm_inherit_t inheritance);
+
+static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
+{
+ kern_return_t ret;
+ mach_vm_address_t buf_rw, buf_rx;
+ vm_prot_t cur_prot, max_prot;
+
+ /* Map the read-write portion via normal anon memory. */
+ if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
+ return false;
+ }
+
+ buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
+ buf_rx = 0;
+ ret = mach_vm_remap(mach_task_self(),
+ &buf_rx,
+ size,
+ 0,
+ VM_FLAGS_ANYWHERE,
+ mach_task_self(),
+ buf_rw,
+ false,
+ &cur_prot,
+ &max_prot,
+ VM_INHERIT_NONE);
+ if (ret != KERN_SUCCESS) {
+ /* TODO: Convert "ret" to a human readable error message. */
+ error_setg(errp, "vm_remap for jit splitwx failed");
+ munmap((void *)buf_rw, size);
+ return false;
+ }
+
+ if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
+ error_setg_errno(errp, errno, "mprotect for jit splitwx");
+ munmap((void *)buf_rx, size);
+ munmap((void *)buf_rw, size);
+ return false;
+ }
+
+ tcg_splitwx_diff = buf_rx - buf_rw;
+ return true;
+}
+#endif /* CONFIG_DARWIN */
+#endif /* CONFIG_TCG_INTERPRETER */
+
+static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
+{
+#ifndef CONFIG_TCG_INTERPRETER
+# ifdef CONFIG_DARWIN
+ return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
+# endif
+# ifdef CONFIG_POSIX
+ return alloc_code_gen_buffer_splitwx_memfd(size, errp);
+# endif
+#endif
+ error_setg(errp, "jit split-wx not supported");
+ return false;
+}
+
+static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+{
+ ERRP_GUARD();
+ int prot, flags;
+
+ if (splitwx) {
+ if (alloc_code_gen_buffer_splitwx(size, errp)) {
+ return true;
+ }
+ /*
+ * If splitwx force-on (1), fail;
+ * if splitwx default-on (-1), fall through to splitwx off.
+ */
+ if (splitwx > 0) {
+ return false;
+ }
+ error_free_or_abort(errp);
+ }
+
+ prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#ifdef CONFIG_TCG_INTERPRETER
+ /* The tcg interpreter does not need execute permission. */
+ prot = PROT_READ | PROT_WRITE;
+#elif defined(CONFIG_DARWIN)
+ /* Applicable to both iOS and macOS (Apple Silicon). */
+ if (!splitwx) {
+ flags |= MAP_JIT;
+ }
+#endif
+
+ return alloc_code_gen_buffer_anon(size, prot, flags, errp);
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
static bool tb_cmp(const void *ap, const void *bp)
{
@@ -1152,13 +1330,19 @@ static void tb_htable_init(void)
/* Must be called before using the QEMU cpus. 'tb_size' is the size
(in bytes) allocated to the translation buffer. Zero means default
size. */
-void tcg_exec_init(unsigned long tb_size)
+void tcg_exec_init(unsigned long tb_size, int splitwx)
{
+ bool ok;
+
tcg_allowed = true;
cpu_gen_init();
page_init();
tb_htable_init();
- code_gen_alloc(tb_size);
+
+ ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
+ splitwx, &error_fatal);
+ assert(ok);
+
#if defined(CONFIG_SOFTMMU)
/* There's no guest base to take into account, so go ahead and
initialize the prologue now. */
@@ -1722,7 +1906,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
}
gen_code_buf = tcg_ctx->code_gen_ptr;
- tb->tc.ptr = gen_code_buf;
+ tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
tb->pc = pc;
tb->cs_base = cs_base;
tb->flags = flags;
@@ -1816,15 +2000,19 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
qemu_log_in_addr_range(tb->pc)) {
FILE *logfile = qemu_log_lock();
- int code_size, data_size = 0;
+ int code_size, data_size;
+ const tcg_target_ulong *rx_data_gen_ptr;
size_t chunk_start;
int insn = 0;
if (tcg_ctx->data_gen_ptr) {
- code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
+ rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
+ code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
data_size = gen_code_size - code_size;
} else {
+ rx_data_gen_ptr = 0;
code_size = gen_code_size;
+ data_size = 0;
}
/* Dump header and the first instruction */
@@ -1859,16 +2047,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
if (data_size) {
int i;
qemu_log(" data: [size=%d]\n", data_size);
- for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
- if (sizeof(tcg_target_ulong) == 8) {
- qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
- (uintptr_t)tcg_ctx->data_gen_ptr + i,
- *(uint64_t *)(tcg_ctx->data_gen_ptr + i));
- } else {
- qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
- (uintptr_t)tcg_ctx->data_gen_ptr + i,
- *(uint32_t *)(tcg_ctx->data_gen_ptr + i));
- }
+ for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
+ qemu_log("0x%08" PRIxPTR ": .quad 0x%" TCG_PRIlx "\n",
+ (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
}
}
qemu_log("\n");
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index fb1e19c585..a49a794065 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -133,8 +133,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
}
/* The disas_log hook may use these values rather than recompute. */
- db->tb->size = db->pc_next - db->pc_first;
- db->tb->icount = db->num_insns;
+ tb->size = db->pc_next - db->pc_first;
+ tb->icount = db->num_insns;
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 9c700c6234..65163e1396 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -909,7 +909,7 @@ int main(int argc, char **argv)
}
/* init tcg before creating CPUs and to get qemu_host_page_size */
- tcg_exec_init(0);
+ tcg_exec_init(0, false);
cpu_type = parse_cpu_option(cpu_model);
cpu = cpu_create(cpu_type);
diff --git a/disas.c b/disas.c
index 7c18d7d2a7..a61f95b580 100644
--- a/disas.c
+++ b/disas.c
@@ -299,7 +299,7 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
}
/* Disassemble this for me please... (debugging). */
-void disas(FILE *out, void *code, unsigned long size)
+void disas(FILE *out, const void *code, unsigned long size)
{
uintptr_t pc;
int count;
diff --git a/disas/capstone.c b/disas/capstone.c
index 7462c0e305..20bc8f9669 100644
--- a/disas/capstone.c
+++ b/disas/capstone.c
@@ -229,7 +229,7 @@ bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
}
/* Disassemble SIZE bytes at CODE for the host. */
-bool cap_disas_host(disassemble_info *info, void *code, size_t size)
+bool cap_disas_host(disassemble_info *info, const void *code, size_t size)
{
csh handle;
const uint8_t *cbuf;
diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
index 2164762b46..d1133a4e04 100644
--- a/include/disas/dis-asm.h
+++ b/include/disas/dis-asm.h
@@ -358,7 +358,7 @@ typedef struct disassemble_info {
(bfd_vma addr, struct disassemble_info * info);
/* These are for buffer_read_memory. */
- bfd_byte *buffer;
+ const bfd_byte *buffer;
bfd_vma buffer_vma;
int buffer_length;
@@ -462,7 +462,7 @@ int print_insn_rx(bfd_vma, disassemble_info *);
#ifdef CONFIG_CAPSTONE
bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size);
-bool cap_disas_host(disassemble_info *info, void *code, size_t size);
+bool cap_disas_host(disassemble_info *info, const void *code, size_t size);
bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count);
bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size);
#else
diff --git a/include/disas/disas.h b/include/disas/disas.h
index 36c33f6f19..d363e95ede 100644
--- a/include/disas/disas.h
+++ b/include/disas/disas.h
@@ -7,7 +7,7 @@
#include "cpu.h"
/* Disassemble this for me please... (debugging). */
-void disas(FILE *out, void *code, unsigned long size);
+void disas(FILE *out, const void *code, unsigned long size);
void target_disas(FILE *out, CPUState *cpu, target_ulong code,
target_ulong size);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index fab573da06..2e5b4bba48 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -448,7 +448,7 @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
* Note: the address of search data can be obtained by adding @size to @ptr.
*/
struct tb_tc {
- void *ptr; /* pointer to the translated code */
+ const void *ptr; /* pointer to the translated code */
size_t size;
};
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 822c43cfd3..aa4b44354a 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -32,7 +32,7 @@ static inline void gen_io_end(void)
tcg_temp_free_i32(tmp);
}
-static inline void gen_tb_start(TranslationBlock *tb)
+static inline void gen_tb_start(const TranslationBlock *tb)
{
TCGv_i32 count, imm;
@@ -71,7 +71,7 @@ static inline void gen_tb_start(TranslationBlock *tb)
tcg_temp_free_i32(count);
}
-static inline void gen_tb_end(TranslationBlock *tb, int num_insns)
+static inline void gen_tb_end(const TranslationBlock *tb, int num_insns)
{
if (tb_cflags(tb) & CF_USE_ICOUNT) {
/* Update the num_insn immediate parameter now that we know
diff --git a/include/exec/log.h b/include/exec/log.h
index e02fff5de1..3c7fa65ead 100644
--- a/include/exec/log.h
+++ b/include/exec/log.h
@@ -56,7 +56,7 @@ static inline void log_target_disas(CPUState *cpu, target_ulong start,
rcu_read_unlock();
}
-static inline void log_disas(void *code, unsigned long size)
+static inline void log_disas(const void *code, unsigned long size)
{
QemuLogFile *logfile;
rcu_read_lock();
diff --git a/include/exec/translator.h b/include/exec/translator.h
index 638e1529c5..24232ead41 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -67,7 +67,7 @@ typedef enum DisasJumpType {
* Architecture-agnostic disassembly context.
*/
typedef struct DisasContextBase {
- TranslationBlock *tb;
+ const TranslationBlock *tb;
target_ulong pc_first;
target_ulong pc_next;
DisasJumpType is_jmp;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8e7552910d..140fa32a5e 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -189,7 +189,8 @@ struct CPUClass {
void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list,
Error **errp);
void (*set_pc)(CPUState *cpu, vaddr value);
- void (*synchronize_from_tb)(CPUState *cpu, struct TranslationBlock *tb);
+ void (*synchronize_from_tb)(CPUState *cpu,
+ const struct TranslationBlock *tb);
bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
diff --git a/include/qemu/cacheflush.h b/include/qemu/cacheflush.h
index 58ae488491..ae20bcda73 100644
--- a/include/qemu/cacheflush.h
+++ b/include/qemu/cacheflush.h
@@ -8,16 +8,27 @@
#ifndef QEMU_CACHEFLUSH_H
#define QEMU_CACHEFLUSH_H
+/**
+ * flush_idcache_range:
+ * @rx: instruction address
+ * @rw: data address
+ * @len: length to flush
+ *
+ * Flush @len bytes of the data cache at @rw and the icache at @rx
+ * to bring them in sync. The two addresses may be different virtual
+ * mappings of the same physical page(s).
+ */
+
#if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
/* icache is coherent and does not require flushing. */
}
#else
-void flush_icache_range(uintptr_t start, uintptr_t stop);
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
#endif
diff --git a/include/sysemu/tcg.h b/include/sysemu/tcg.h
index d9d3ca8559..00349fb18a 100644
--- a/include/sysemu/tcg.h
+++ b/include/sysemu/tcg.h
@@ -8,7 +8,8 @@
#ifndef SYSEMU_TCG_H
#define SYSEMU_TCG_H
-void tcg_exec_init(unsigned long tb_size);
+void tcg_exec_init(unsigned long tb_size, int splitwx);
+
#ifdef CONFIG_TCG
extern bool tcg_allowed;
#define tcg_enabled() (tcg_allowed)
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index 5b3bdacc39..901b19f32a 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -805,7 +805,7 @@ static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
* be NULL and @idx should be 0. Otherwise, @tb should be valid and
* @idx should be one of the TB_EXIT_ values.
*/
-void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx);
+void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx);
/**
* tcg_gen_goto_tb() - output goto_tb TCG operation
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 67092e82c6..70a76646c4 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -211,6 +211,11 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+/* Only used by i386 to cope with stupid register constraints. */
+DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
+ IMPL(TCG_TARGET_HAS_qemu_st8_i32))
+
/* Host vector support. */
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 8ff9dad4ef..95fe5604eb 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -261,7 +261,7 @@ struct TCGLabel {
unsigned refs : 16;
union {
uintptr_t value;
- tcg_insn_unit *value_ptr;
+ const tcg_insn_unit *value_ptr;
} u;
QSIMPLEQ_HEAD(, TCGRelocation) relocs;
QSIMPLEQ_ENTRY(TCGLabel) next;
@@ -621,8 +621,6 @@ struct TCGContext {
here, because there's too much arithmetic throughout that relies
on addition and subtraction working on bytes. Rely on the GCC
extension that allows arithmetic on void*. */
- void *code_gen_prologue;
- void *code_gen_epilogue;
void *code_gen_buffer;
size_t code_gen_buffer_size;
void *code_gen_ptr;
@@ -679,8 +677,36 @@ struct TCGContext {
extern TCGContext tcg_init_ctx;
extern __thread TCGContext *tcg_ctx;
+extern const void *tcg_code_gen_epilogue;
+extern uintptr_t tcg_splitwx_diff;
extern TCGv_env cpu_env;
+static inline bool in_code_gen_buffer(const void *p)
+{
+ const TCGContext *s = &tcg_init_ctx;
+ /*
+ * Much like it is valid to have a pointer to the byte past the
+ * end of an array (so long as you don't dereference it), allow
+ * a pointer to the byte past the end of the code gen buffer.
+ */
+ return (size_t)(p - s->code_gen_buffer) <= s->code_gen_buffer_size;
+}
+
+#ifdef CONFIG_DEBUG_TCG
+const void *tcg_splitwx_to_rx(void *rw);
+void *tcg_splitwx_to_rw(const void *rx);
+#else
+static inline const void *tcg_splitwx_to_rx(void *rw)
+{
+ return rw ? rw + tcg_splitwx_diff : NULL;
+}
+
+static inline void *tcg_splitwx_to_rw(const void *rx)
+{
+ return rx ? (void *)rx - tcg_splitwx_diff : NULL;
+}
+#endif
+
static inline size_t temp_idx(TCGTemp *ts)
{
ptrdiff_t n = ts - tcg_ctx->temps;
@@ -1101,7 +1127,7 @@ static inline TCGLabel *arg_label(TCGArg i)
* correct result.
*/
-static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b)
+static inline ptrdiff_t tcg_ptr_byte_diff(const void *a, const void *b)
{
return a - b;
}
@@ -1115,9 +1141,22 @@ static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b)
* to the destination address.
*/
-static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, void *target)
+static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, const void *target)
+{
+ return tcg_ptr_byte_diff(target, tcg_splitwx_to_rx(s->code_ptr));
+}
+
+/**
+ * tcg_tbrel_diff
+ * @s: the tcg context
+ * @target: address of the target
+ *
+ * Produce a difference, from the beginning of the current TB code
+ * to the destination address.
+ */
+static inline ptrdiff_t tcg_tbrel_diff(TCGContext *s, const void *target)
{
- return tcg_ptr_byte_diff(target, s->code_ptr);
+ return tcg_ptr_byte_diff(target, tcg_splitwx_to_rx(s->code_buf));
}
/**
@@ -1222,14 +1261,14 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi)
#define TB_EXIT_IDXMAX 1
#define TB_EXIT_REQUESTED 3
-#ifdef HAVE_TCG_QEMU_TB_EXEC
-uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
+#ifdef CONFIG_TCG_INTERPRETER
+uintptr_t tcg_qemu_tb_exec(CPUArchState *env, const void *tb_ptr);
#else
-# define tcg_qemu_tb_exec(env, tb_ptr) \
- ((uintptr_t (*)(void *, void *))tcg_ctx->code_gen_prologue)(env, tb_ptr)
+typedef uintptr_t tcg_prologue_fn(CPUArchState *env, const void *tb_ptr);
+extern tcg_prologue_fn *tcg_qemu_tb_exec;
#endif
-void tcg_register_jit(void *buf, size_t buf_size);
+void tcg_register_jit(const void *buf, size_t buf_size);
#if TCG_TARGET_MAYBE_vec
/* Return zero if the tuple (opc, type, vece) is unsupportable;
diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index 661b5daa9f..7193c3b226 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -748,8 +748,10 @@
IOCTL(TUNSETQUEUE, IOC_W, MK_PTR(MK_STRUCT(STRUCT_short_ifreq)))
IOCTL(TUNSETIFINDEX , IOC_W, MK_PTR(TYPE_INT))
/* TUNGETFILTER is not supported: see TUNATTACHFILTER. */
+#ifdef TUNSETVNETLE
IOCTL(TUNSETVNETLE, IOC_W, MK_PTR(TYPE_INT))
IOCTL(TUNGETVNETLE, IOC_R, MK_PTR(TYPE_INT))
+#endif
#ifdef TUNSETVNETBE
IOCTL(TUNSETVNETBE, IOC_W, MK_PTR(TYPE_INT))
IOCTL(TUNGETVNETBE, IOC_R, MK_PTR(TYPE_INT))
diff --git a/linux-user/main.c b/linux-user/main.c
index 750a01118f..bb4e55e8fc 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -701,7 +701,7 @@ int main(int argc, char **argv, char **envp)
cpu_type = parse_cpu_option(cpu_model);
/* init tcg before creating CPUs and to get qemu_host_page_size */
- tcg_exec_init(0);
+ tcg_exec_init(0, false);
cpu = cpu_create(cpu_type);
env = cpu->env_ptr;
diff --git a/qemu-options.hx b/qemu-options.hx
index 459c916d3d..1698a0c751 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -115,6 +115,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
" igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n"
" kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n"
" kvm-shadow-mem=size of KVM shadow MMU in bytes\n"
+ " split-wx=on|off (enable TCG split w^x mapping)\n"
" tb-size=n (TCG translation block cache size)\n"
" thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
SRST
@@ -140,6 +141,12 @@ SRST
``kvm-shadow-mem=size``
Defines the size of the KVM shadow MMU.
+ ``split-wx=on|off``
+ Controls the use of split w^x mapping for the TCG code generation
+ buffer. Some operating systems require this to be enabled, and in
+ such a case this will default on. On other operating systems, this
+ will default off, but one may enable this for testing or debugging.
+
``tb-size=n``
Controls the size (in MiB) of the TCG translation block cache.
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 8b9ffc41c2..6301f4f0a5 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -2946,7 +2946,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
invalidate_and_set_dirty(mr, addr1, l);
break;
case FLUSH_CACHE:
- flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l);
+ flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
break;
}
}
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index d6188f6566..62e319eb6a 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -54,7 +54,8 @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value)
}
}
-static void arm_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void arm_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 2e3fdfdf6b..ef63edfc68 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -410,7 +410,7 @@ static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
{
- TranslationBlock *tb;
+ const TranslationBlock *tb;
tb = s->base.tb;
if (use_goto_tb(s, n, dest)) {
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index 5d9c4ad5bf..6f3d5a9e4a 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -41,7 +41,8 @@ static bool avr_cpu_has_work(CPUState *cs)
&& cpu_interrupts_enabled(env);
}
-static void avr_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void avr_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
AVRCPU *cpu = AVR_CPU(cs);
CPUAVRState *env = &cpu->env;
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index 71b6aca45d..e28f047d10 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -35,7 +35,8 @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value)
cpu->env.iaoq_b = value + 4;
}
-static void hppa_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void hppa_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
HPPACPU *cpu = HPPA_CPU(cs);
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 628dd29fe7..4fa013720e 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -49,7 +49,8 @@ static void x86_cpu_exec_exit(CPUState *cs)
env->eflags = cpu_compute_eflags(env);
}
-static void x86_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void x86_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
X86CPU *cpu = X86_CPU(cs);
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 9b2482159d..c8e754cfb1 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -83,7 +83,8 @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
cpu->env.iflags = 0;
}
-static void mb_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void mb_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 2283214c87..b2864d67d7 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -47,7 +47,8 @@ static void mips_cpu_set_pc(CPUState *cs, vaddr value)
}
}
-static void mips_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void mips_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
MIPSCPU *cpu = MIPS_CPU(cs);
CPUMIPSState *env = &cpu->env;
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 254cd83f8b..8227d7aea9 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -314,7 +314,8 @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
env->pc = value;
}
-static void riscv_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void riscv_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
RISCVCPU *cpu = RISCV_CPU(cs);
CPURISCVState *env = &cpu->env;
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 23ee17a701..2bb14144a7 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -33,7 +33,8 @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value)
cpu->env.pc = value;
}
-static void rx_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void rx_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
RXCPU *cpu = RX_CPU(cs);
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 3c68021c56..1e0f05a15b 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -34,7 +34,8 @@ static void superh_cpu_set_pc(CPUState *cs, vaddr value)
cpu->env.pc = value;
}
-static void superh_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void superh_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
SuperHCPU *cpu = SUPERH_CPU(cs);
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index d8a8bb1dda..6f14e370ed 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -691,7 +691,8 @@ static void sparc_cpu_set_pc(CPUState *cs, vaddr value)
cpu->env.npc = value + 4;
}
-static void sparc_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb)
+static void sparc_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
{
SPARCCPU *cpu = SPARC_CPU(cs);
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index 2f2e5b029f..4bff1d4718 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -42,7 +42,7 @@ static void tricore_cpu_set_pc(CPUState *cs, vaddr value)
}
static void tricore_cpu_synchronize_from_tb(CPUState *cs,
- TranslationBlock *tb)
+ const TranslationBlock *tb)
{
TriCoreCPU *cpu = TRICORE_CPU(cs);
CPUTriCoreState *env = &cpu->env;
diff --git a/tcg/README b/tcg/README
index 2f051e5c97..0cf9e2727c 100644
--- a/tcg/README
+++ b/tcg/README
@@ -502,6 +502,7 @@ goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
* qemu_ld_i32/i64 t0, t1, flags, memidx
* qemu_st_i32/i64 t0, t1, flags, memidx
+* qemu_st8_i32 t0, t1, flags, memidx
Load data at the guest address t1 into t0, or store data in t0 at guest
address t1. The _i32/_i64 size applies to the size of the input/output
@@ -518,6 +519,10 @@ of the memory access.
For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
64-bit memory access specified in flags.
+For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
+the memory operation is known to be 8-bit. This allows the backend to
+provide a different set of register constraints.
+
********* Host vector operations
All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE.
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 26f71cb599..ab199b143f 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -78,38 +78,42 @@ static const int tcg_target_call_oarg_regs[1] = {
#define TCG_REG_GUEST_BASE TCG_REG_X28
#endif
-static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- ptrdiff_t offset = target - code_ptr;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = target - src_rx;
+
if (offset == sextract64(offset, 0, 26)) {
/* read instruction, mask away previous PC_REL26 parameter contents,
set the proper offset, then write back the instruction. */
- *code_ptr = deposit32(*code_ptr, 0, 26, offset);
+ *src_rw = deposit32(*src_rw, 0, 26, offset);
return true;
}
return false;
}
-static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- ptrdiff_t offset = target - code_ptr;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = target - src_rx;
+
if (offset == sextract64(offset, 0, 19)) {
- *code_ptr = deposit32(*code_ptr, 5, 19, offset);
+ *src_rw = deposit32(*src_rw, 5, 19, offset);
return true;
}
return false;
}
-static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
- intptr_t value, intptr_t addend)
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
{
tcg_debug_assert(addend == 0);
switch (type) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
- return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
+ return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
case R_AARCH64_CONDBR19:
- return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
+ return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
default:
g_assert_not_reached();
}
@@ -1050,12 +1054,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
/* Look for host pointer values within 4G of the PC. This happens
often when loading pointers to QEMU's own data structures. */
if (type == TCG_TYPE_I64) {
- tcg_target_long disp = value - (intptr_t)s->code_ptr;
+ intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
+ tcg_target_long disp = value - src_rx;
if (disp == sextract64(disp, 0, 21)) {
tcg_out_insn(s, 3406, ADR, rd, disp);
return;
}
- disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
+ disp = (value >> 12) - (src_rx >> 12);
if (disp == sextract64(disp, 0, 21)) {
tcg_out_insn(s, 3406, ADRP, rd, disp);
if (value & 0xfff) {
@@ -1306,18 +1311,18 @@ static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
}
}
-static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
+static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
{
- ptrdiff_t offset = target - s->code_ptr;
+ ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
tcg_debug_assert(offset == sextract64(offset, 0, 26));
tcg_out_insn(s, 3206, B, offset);
}
-static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
+static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
{
- ptrdiff_t offset = target - s->code_ptr;
+ ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
if (offset == sextract64(offset, 0, 26)) {
- tcg_out_insn(s, 3206, BL, offset);
+ tcg_out_insn(s, 3206, B, offset);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
@@ -1329,9 +1334,9 @@ static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
tcg_out_insn(s, 3207, BLR, reg);
}
-static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
{
- ptrdiff_t offset = target - s->code_ptr;
+ ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
if (offset == sextract64(offset, 0, 26)) {
tcg_out_insn(s, 3206, BL, offset);
} else {
@@ -1340,21 +1345,21 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
}
}
-void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
- uintptr_t addr)
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
{
tcg_insn_unit i1, i2;
TCGType rt = TCG_TYPE_I64;
TCGReg rd = TCG_REG_TMP;
uint64_t pair;
- ptrdiff_t offset = addr - jmp_addr;
+ ptrdiff_t offset = addr - jmp_rx;
if (offset == sextract64(offset, 0, 26)) {
i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
i2 = NOP;
} else {
- offset = (addr >> 12) - (jmp_addr >> 12);
+ offset = (addr >> 12) - (jmp_rx >> 12);
/* patch ADRP */
i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
@@ -1362,8 +1367,8 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
}
pair = (uint64_t)i2 << 32 | i1;
- qatomic_set((uint64_t *)jmp_addr, pair);
- flush_icache_range(jmp_addr, jmp_addr + 8);
+ qatomic_set((uint64_t *)jmp_rw, pair);
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
}
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
@@ -1393,7 +1398,7 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
offset = tcg_in32(s) >> 5;
} else {
- offset = l->u.value_ptr - s->code_ptr;
+ offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
tcg_debug_assert(offset == sextract64(offset, 0, 19));
}
@@ -1568,7 +1573,7 @@ static void * const qemu_st_helpers[16] = {
[MO_BEQ] = helper_be_stq_mmu,
};
-static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
+static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
{
ptrdiff_t offset = tcg_pcrel_diff(s, target);
tcg_debug_assert(offset == sextract64(offset, 0, 21));
@@ -1581,7 +1586,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
MemOp opc = get_memop(oi);
MemOp size = opc & MO_SIZE;
- if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
+ if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -1606,7 +1611,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
MemOp opc = get_memop(oi);
MemOp size = opc & MO_SIZE;
- if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
+ if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -1631,7 +1636,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
label->type = ext;
label->datalo_reg = data_reg;
label->addrlo_reg = addr_reg;
- label->raddr = raddr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr;
}
@@ -1849,7 +1854,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
#endif /* CONFIG_SOFTMMU */
}
-static tcg_insn_unit *tb_ret_addr;
+static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS],
@@ -1873,7 +1878,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_exit_tb:
/* Reuse the zeroing that exists for goto_ptr. */
if (a0 == 0) {
- tcg_out_goto_long(s, s->code_gen_epilogue);
+ tcg_out_goto_long(s, tcg_code_gen_epilogue);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
tcg_out_goto_long(s, tb_ret_addr);
@@ -2894,11 +2899,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
* Return path for goto_ptr. Set return value to 0, a-la exit_tb,
* and fall through to the rest of the epilogue.
*/
- s->code_gen_epilogue = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
/* TB epilogue */
- tb_ret_addr = s->code_ptr;
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
/* Remove TCG locals stack space. */
tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
@@ -2964,7 +2969,7 @@ static const DebugFrame debug_frame = {
}
};
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 8a6b97598e..5ec30dba25 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -88,6 +88,7 @@ typedef enum {
#define TCG_TARGET_HAS_extrl_i64_i32 0
#define TCG_TARGET_HAS_extrh_i64_i32 0
#define TCG_TARGET_HAS_goto_ptr 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rem_i64 1
@@ -148,7 +149,7 @@ typedef enum {
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 62c37a954b..0fd1126454 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -187,29 +187,32 @@ static const uint8_t tcg_cond_to_arm_cond[] = {
[TCG_COND_GTU] = COND_HI,
};
-static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) >> 2;
+
if (offset == sextract32(offset, 0, 24)) {
- *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
+ *src_rw = deposit32(*src_rw, 0, 24, offset);
return true;
}
return false;
}
-static inline bool reloc_pc13(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- ptrdiff_t offset = tcg_ptr_byte_diff(target, code_ptr) - 8;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
if (offset >= -0xfff && offset <= 0xfff) {
- tcg_insn_unit insn = *code_ptr;
+ tcg_insn_unit insn = *src_rw;
bool u = (offset >= 0);
if (!u) {
offset = -offset;
}
insn = deposit32(insn, 23, 1, u);
insn = deposit32(insn, 0, 12, offset);
- *code_ptr = insn;
+ *src_rw = insn;
return true;
}
return false;
@@ -221,9 +224,9 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
tcg_debug_assert(addend == 0);
if (type == R_ARM_PC24) {
- return reloc_pc24(code_ptr, (tcg_insn_unit *)value);
+ return reloc_pc24(code_ptr, (const tcg_insn_unit *)value);
} else if (type == R_ARM_PC13) {
- return reloc_pc13(code_ptr, (tcg_insn_unit *)value);
+ return reloc_pc13(code_ptr, (const tcg_insn_unit *)value);
} else {
g_assert_not_reached();
}
@@ -617,7 +620,7 @@ static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
/* Check for a pc-relative address. This will usually be the TB,
or within the TB, which is immediately before the code block. */
- diff = arg - ((intptr_t)s->code_ptr + 8);
+ diff = tcg_pcrel_diff(s, (void *)arg) - 8;
if (diff >= 0) {
rot = encode_imm(diff);
if (rot >= 0) {
@@ -1019,7 +1022,7 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
* with the code buffer limited to 16MB we wouldn't need the long case.
* But we also use it for the tail-call to the qemu_ld/st helpers, which does.
*/
-static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
+static void tcg_out_goto(TCGContext *s, int cond, const tcg_insn_unit *addr)
{
intptr_t addri = (intptr_t)addr;
ptrdiff_t disp = tcg_pcrel_diff(s, addr);
@@ -1033,7 +1036,7 @@ static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
/* The call case is mostly used for helpers - so it's not unreasonable
* for them to be beyond branch range */
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
{
intptr_t addri = (intptr_t)addr;
ptrdiff_t disp = tcg_pcrel_diff(s, addr);
@@ -1337,7 +1340,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
- label->raddr = raddr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr;
}
@@ -1348,7 +1351,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
MemOp opc = get_memop(oi);
void *func;
- if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) {
+ if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -1411,7 +1414,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
TCGMemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
- if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) {
+ if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -1762,8 +1765,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
TCGReg base = TCG_REG_PC;
tcg_debug_assert(s->tb_jmp_insn_offset == 0);
- ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
- dif = ptr - ((intptr_t)s->code_ptr + 8);
+ ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]);
+ dif = tcg_pcrel_diff(s, (void *)ptr) - 8;
dil = sextract32(dif, 0, 12);
if (dif != dil) {
/* The TB is close, but outside the 12 bits addressable by
@@ -2297,7 +2300,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
* Return path for goto_ptr. Set return value to 0, a-la exit_tb,
* and fall through to the rest of the epilogue.
*/
- s->code_gen_epilogue = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
tcg_out_epilogue(s);
}
@@ -2353,7 +2356,7 @@ static const DebugFrame debug_frame = {
}
};
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index f1955ce4ac..8d1fee6327 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -126,6 +126,7 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_rem_i32 0
#define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
enum {
TCG_AREG0 = TCG_REG_R6,
@@ -135,7 +136,7 @@ enum {
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
/* not defined -- call should be eliminated at compile time */
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index d8797ed398..46e856f442 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -154,18 +154,17 @@ bool have_bmi1;
bool have_popcnt;
bool have_avx1;
bool have_avx2;
+bool have_movbe;
#ifdef CONFIG_CPUID_H
-static bool have_movbe;
static bool have_bmi2;
static bool have_lzcnt;
#else
-# define have_movbe 0
# define have_bmi2 0
# define have_lzcnt 0
#endif
-static tcg_insn_unit *tb_ret_addr;
+static const tcg_insn_unit *tb_ret_addr;
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
@@ -173,7 +172,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
value += addend;
switch(type) {
case R_386_PC32:
- value -= (uintptr_t)code_ptr;
+ value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
if (value != (int32_t)value) {
return false;
}
@@ -182,7 +181,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
tcg_patch32(code_ptr, value);
break;
case R_386_PC8:
- value -= (uintptr_t)code_ptr;
+ value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
if (value != (int8_t)value) {
return false;
}
@@ -246,11 +245,21 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
ct->regs |= ALL_VECTOR_REGS;
break;
- /* qemu_ld/st address constraint */
case 'L':
+ /* qemu_ld/st data+address constraint */
ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
+#ifdef CONFIG_SOFTMMU
tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
+#endif
+ break;
+ case 's':
+ /* qemu_st8_i32 data constraint */
+ ct->regs = 0xf;
+#ifdef CONFIG_SOFTMMU
+ tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
+ tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
+#endif
break;
case 'e':
@@ -1006,7 +1015,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
}
/* Try a 7 byte pc-relative lea before the 10 byte movq. */
- diff = arg - ((uintptr_t)s->code_ptr + 7);
+ diff = tcg_pcrel_diff(s, (const void *)arg) - 7;
if (diff == (int32_t)diff) {
tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
@@ -1452,7 +1461,7 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
default:
tcg_abort();
}
- tcg_out_label(s, label_next, s->code_ptr);
+ tcg_out_label(s, label_next);
}
#endif
@@ -1494,10 +1503,10 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
tcg_out_jxx(s, JCC_JMP, label_over, 1);
- tcg_out_label(s, label_true, s->code_ptr);
+ tcg_out_label(s, label_true);
tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
- tcg_out_label(s, label_over, s->code_ptr);
+ tcg_out_label(s, label_over);
} else {
/* When the destination does not overlap one of the arguments,
clear the destination first, jump if cond false, and emit an
@@ -1511,7 +1520,7 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
tcg_out_brcond2(s, new_args, const_args+1, 1);
tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
- tcg_out_label(s, label_over, s->code_ptr);
+ tcg_out_label(s, label_over);
}
}
#endif
@@ -1525,7 +1534,7 @@ static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
TCGLabel *over = gen_new_label();
tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
- tcg_out_label(s, over, s->code_ptr);
+ tcg_out_label(s, over);
}
}
@@ -1591,7 +1600,7 @@ static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
}
}
-static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
+static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest)
{
intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
@@ -1610,12 +1619,12 @@ static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
}
}
-static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
+static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
{
tcg_out_branch(s, 1, dest);
}
-static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
+static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest)
{
tcg_out_branch(s, 0, dest);
}
@@ -1786,7 +1795,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
- label->raddr = raddr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr[0];
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
label->label_ptr[1] = label_ptr[1];
@@ -1986,13 +1995,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
int seg, bool is64, MemOp memop)
{
- const MemOp real_bswap = memop & MO_BSWAP;
- MemOp bswap = real_bswap;
+ bool use_movbe = false;
int rexw = is64 * P_REXW;
int movop = OPC_MOVL_GvEv;
- if (have_movbe && real_bswap) {
- bswap = 0;
+ /* Do big-endian loads with movbe. */
+ if (memop & MO_BSWAP) {
+ tcg_debug_assert(have_movbe);
+ use_movbe = true;
movop = OPC_MOVBE_GyMy;
}
@@ -2006,23 +2016,28 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
base, index, 0, ofs);
break;
case MO_UW:
- tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
- base, index, 0, ofs);
- if (real_bswap) {
- tcg_out_rolw_8(s, datalo);
- }
- break;
- case MO_SW:
- if (real_bswap) {
- if (have_movbe) {
+ if (use_movbe) {
+ /* There is no extending movbe; only low 16-bits are modified. */
+ if (datalo != base && datalo != index) {
+ /* XOR breaks dependency chains. */
+ tgen_arithr(s, ARITH_XOR, datalo, datalo);
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, index, 0, ofs);
} else {
- tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
- base, index, 0, ofs);
- tcg_out_rolw_8(s, datalo);
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ tcg_out_ext16u(s, datalo, datalo);
}
- tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo);
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+ base, index, 0, ofs);
+ }
+ break;
+ case MO_SW:
+ if (use_movbe) {
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ tcg_out_ext16s(s, datalo, datalo, rexw);
} else {
tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
datalo, base, index, 0, ofs);
@@ -2030,18 +2045,12 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
break;
case MO_UL:
tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
- if (bswap) {
- tcg_out_bswap32(s, datalo);
- }
break;
#if TCG_TARGET_REG_BITS == 64
case MO_SL:
- if (real_bswap) {
- tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+ if (use_movbe) {
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo,
base, index, 0, ofs);
- if (bswap) {
- tcg_out_bswap32(s, datalo);
- }
tcg_out_ext32s(s, datalo, datalo);
} else {
tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
@@ -2053,12 +2062,9 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
base, index, 0, ofs);
- if (bswap) {
- tcg_out_bswap64(s, datalo);
- }
} else {
- if (real_bswap) {
- int t = datalo;
+ if (use_movbe) {
+ TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
@@ -2073,14 +2079,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
tcg_out_modrm_sib_offset(s, movop + seg, datalo,
base, index, 0, ofs);
}
- if (bswap) {
- tcg_out_bswap32(s, datalo);
- tcg_out_bswap32(s, datahi);
- }
}
break;
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
@@ -2128,69 +2130,40 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
int seg, MemOp memop)
{
- /* ??? Ideally we wouldn't need a scratch register. For user-only,
- we could perform the bswap twice to restore the original value
- instead of moving to the scratch. But as it is, the L constraint
- means that TCG_REG_L0 is definitely free here. */
- const TCGReg scratch = TCG_REG_L0;
- const MemOp real_bswap = memop & MO_BSWAP;
- MemOp bswap = real_bswap;
+ bool use_movbe = false;
int movop = OPC_MOVL_EvGv;
- if (have_movbe && real_bswap) {
- bswap = 0;
+ /*
+ * Do big-endian stores with movbe or softmmu.
+ * User-only without movbe will have its swapping done generically.
+ */
+ if (memop & MO_BSWAP) {
+ tcg_debug_assert(have_movbe);
+ use_movbe = true;
movop = OPC_MOVBE_MyGy;
}
switch (memop & MO_SIZE) {
case MO_8:
- /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
- Use the scratch register if necessary. */
- if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
- tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
- datalo = scratch;
- }
+ /* This is handled with constraints on INDEX_op_qemu_st8_i32. */
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4);
tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
datalo, base, index, 0, ofs);
break;
case MO_16:
- if (bswap) {
- tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
- tcg_out_rolw_8(s, scratch);
- datalo = scratch;
- }
tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
base, index, 0, ofs);
break;
case MO_32:
- if (bswap) {
- tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
- tcg_out_bswap32(s, scratch);
- datalo = scratch;
- }
tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
break;
case MO_64:
if (TCG_TARGET_REG_BITS == 64) {
- if (bswap) {
- tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
- tcg_out_bswap64(s, scratch);
- datalo = scratch;
- }
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
base, index, 0, ofs);
- } else if (bswap) {
- tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
- tcg_out_bswap32(s, scratch);
- tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
- base, index, 0, ofs);
- tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
- tcg_out_bswap32(s, scratch);
- tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
- base, index, 0, ofs + 4);
} else {
- if (real_bswap) {
- int t = datalo;
+ if (use_movbe) {
+ TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
@@ -2201,7 +2174,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
}
break;
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
@@ -2267,7 +2240,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_exit_tb:
/* Reuse the zeroing that exists for goto_ptr. */
if (a0 == 0) {
- tcg_out_jmp(s, s->code_gen_epilogue);
+ tcg_out_jmp(s, tcg_code_gen_epilogue);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
tcg_out_jmp(s, tb_ret_addr);
@@ -2280,7 +2253,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* jump displacement must be aligned for atomic patching;
* see if we need to add extra nops before jump
*/
- gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
+ gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr;
if (gap != 1) {
tcg_out_nopn(s, gap - 1);
}
@@ -2520,6 +2493,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_qemu_ld(s, args, 1);
break;
case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st8_i32:
tcg_out_qemu_st(s, args, 0);
break;
case INDEX_op_qemu_st_i64:
@@ -2978,9 +2952,11 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
+ static const TCGTargetOpDef s_L = { .args_ct_str = { "s", "L" } };
static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
+ static const TCGTargetOpDef s_L_L = { .args_ct_str = { "s", "L", "L" } };
static const TCGTargetOpDef r_r_L_L
= { .args_ct_str = { "r", "r", "L", "L" } };
static const TCGTargetOpDef L_L_L_L
@@ -3174,6 +3150,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
case INDEX_op_qemu_st_i32:
return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
+ case INDEX_op_qemu_st8_i32:
+ return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &s_L : &s_L_L;
case INDEX_op_qemu_ld_i64:
return (TCG_TARGET_REG_BITS == 64 ? &r_L
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
@@ -3825,11 +3803,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
* Return path for goto_ptr. Set return value to 0, a-la exit_tb,
* and fall through to the rest of the epilogue.
*/
- s->code_gen_epilogue = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
/* TB epilogue */
- tb_ret_addr = s->code_ptr;
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
@@ -3998,7 +3976,7 @@ static const DebugFrame debug_frame = {
#endif
#if defined(ELF_HOST_MACHINE)
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index cd067e0b30..b693d3692d 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -101,6 +101,7 @@ extern bool have_bmi1;
extern bool have_popcnt;
extern bool have_avx1;
extern bool have_avx2;
+extern bool have_movbe;
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -171,6 +172,9 @@ extern bool have_avx2;
#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+#else
+#define TCG_TARGET_HAS_qemu_st8_i32 1
#endif
/* We do not support older SSE systems, only beginning with AVX1. */
@@ -206,11 +210,11 @@ extern bool have_avx2;
#define TCG_TARGET_extract_i64_valid(ofs, len) \
(((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
-static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
- uintptr_t jmp_addr, uintptr_t addr)
+static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
{
/* patch the branch destination */
- qatomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4));
+ qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4));
/* no need to flush icache explicitly */
}
@@ -225,7 +229,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 41be574e89..add157f6c3 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -139,33 +139,21 @@ static const TCGReg tcg_target_call_oarg_regs[2] = {
TCG_REG_V1
};
-static tcg_insn_unit *tb_ret_addr;
-static tcg_insn_unit *bswap32_addr;
-static tcg_insn_unit *bswap32u_addr;
-static tcg_insn_unit *bswap64_addr;
+static const tcg_insn_unit *tb_ret_addr;
+static const tcg_insn_unit *bswap32_addr;
+static const tcg_insn_unit *bswap32u_addr;
+static const tcg_insn_unit *bswap64_addr;
-static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+static bool reloc_pc16(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
/* Let the compiler perform the right-shift as part of the arithmetic. */
- ptrdiff_t disp = target - (pc + 1);
- tcg_debug_assert(disp == (int16_t)disp);
- return disp & 0xffff;
-}
-
-static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target)
-{
- *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target));
-}
-
-static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target)
-{
- tcg_debug_assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0);
- return ((uintptr_t)target >> 2) & 0x3ffffff;
-}
-
-static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target)
-{
- *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target));
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t disp = target - (src_rx + 1);
+ if (disp == (int16_t)disp) {
+ *src_rw = deposit32(*src_rw, 0, 16, disp);
+ return true;
+ }
+ return false;
}
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
@@ -173,8 +161,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
{
tcg_debug_assert(type == R_MIPS_PC16);
tcg_debug_assert(addend == 0);
- reloc_pc16(code_ptr, (tcg_insn_unit *)value);
- return true;
+ return reloc_pc16(code_ptr, (const tcg_insn_unit *)value);
}
#define TCG_CT_CONST_ZERO 0x100
@@ -516,10 +503,10 @@ static void tcg_out_opc_sa64(TCGContext *s, MIPSInsn opc1, MIPSInsn opc2,
* Type jump.
* Returns true if the branch was in range and the insn was emitted.
*/
-static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target)
+static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, const void *target)
{
uintptr_t dest = (uintptr_t)target;
- uintptr_t from = (uintptr_t)s->code_ptr + 4;
+ uintptr_t from = (uintptr_t)tcg_splitwx_to_rx(s->code_ptr) + 4;
int32_t inst;
/* The pc-region branch happens within the 256MB region of
@@ -631,7 +618,7 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
}
}
-static void tcg_out_bswap_subr(TCGContext *s, tcg_insn_unit *sub)
+static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub)
{
bool ok = tcg_out_opc_jmp(s, OPC_JAL, sub);
tcg_debug_assert(ok);
@@ -924,11 +911,7 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
}
tcg_out_opc_br(s, b_opc, arg1, arg2);
- if (l->has_value) {
- reloc_pc16(s->code_ptr - 1, l->u.value_ptr);
- } else {
- tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0);
- }
+ tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0);
tcg_out_nop(s);
}
@@ -1079,7 +1062,7 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
}
}
-static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
+static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
{
/* Note that the ABI requires the called function's address to be
loaded into T9, even if a direct branch is in range. */
@@ -1097,7 +1080,7 @@ static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
}
}
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
{
tcg_out_call_int(s, arg, false);
tcg_out_nop(s);
@@ -1300,7 +1283,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
- label->raddr = raddr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr[0];
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
label->label_ptr[1] = label_ptr[1];
@@ -1309,15 +1292,17 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
+ const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
TCGMemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
TCGReg v0;
int i;
/* resolve label address */
- reloc_pc16(l->label_ptr[0], s->code_ptr);
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- reloc_pc16(l->label_ptr[1], s->code_ptr);
+ if (!reloc_pc16(l->label_ptr[0], tgt_rx)
+ || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
+ && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
+ return false;
}
i = 1;
@@ -1345,7 +1330,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
- reloc_pc16(s->code_ptr - 1, l->raddr);
+ if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
+ return false;
+ }
/* delay slot */
if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
@@ -1359,15 +1346,17 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
+ const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
TCGMemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
int i;
/* resolve label address */
- reloc_pc16(l->label_ptr[0], s->code_ptr);
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- reloc_pc16(l->label_ptr[1], s->code_ptr);
+ if (!reloc_pc16(l->label_ptr[0], tgt_rx)
+ || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
+ && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
+ return false;
}
i = 1;
@@ -2483,11 +2472,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
* Return path for goto_ptr. Set return value to 0, a-la exit_tb,
* and fall through to the rest of the epilogue.
*/
- s->code_gen_epilogue = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO);
/* TB epilogue */
- tb_ret_addr = s->code_ptr;
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
@@ -2507,7 +2496,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/*
* bswap32 -- 32-bit swap (signed result for mips64). a0 = abcd.
*/
- bswap32_addr = align_code_ptr(s);
+ bswap32_addr = tcg_splitwx_to_rx(align_code_ptr(s));
/* t3 = (ssss)d000 */
tcg_out_opc_sa(s, OPC_SLL, TCG_TMP3, TCG_TMP0, 24);
/* t1 = 000a */
@@ -2535,7 +2524,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/*
* bswap32u -- unsigned 32-bit swap. a0 = ....abcd.
*/
- bswap32u_addr = align_code_ptr(s);
+ bswap32u_addr = tcg_splitwx_to_rx(align_code_ptr(s));
/* t1 = (0000)000d */
tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff);
/* t3 = 000a */
@@ -2561,7 +2550,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/*
* bswap64 -- 64-bit swap. a0 = abcdefgh
*/
- bswap64_addr = align_code_ptr(s);
+ bswap64_addr = tcg_splitwx_to_rx(align_code_ptr(s));
/* t3 = h0000000 */
tcg_out_dsll(s, TCG_TMP3, TCG_TMP0, 56);
/* t1 = 0000000a */
@@ -2656,11 +2645,11 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */
}
-void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
- uintptr_t addr)
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
{
- qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ qatomic_set((uint32_t *)jmp_rw, deposit32(OPC_J, 0, 26, addr >> 2));
+ flush_idcache_range(jmp_rx, jmp_rw, 4);
}
typedef struct {
@@ -2702,7 +2691,7 @@ static const DebugFrame debug_frame = {
}
};
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 92c1d63da3..c2c32fb38f 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -169,6 +169,7 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_ctz_i32 0
#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions
@@ -201,7 +202,7 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 7ca71de956..1fb42eb2a9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1541,6 +1541,7 @@ void tcg_optimize(TCGContext *s)
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_st_i64:
case INDEX_op_call:
/* Opcodes that touch guest memory stop the optimization. */
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 0d068ec8ab..19a4a12f15 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -62,8 +62,6 @@
#define TCG_CT_CONST_MONE 0x2000
#define TCG_CT_CONST_WSZ 0x4000
-static tcg_insn_unit *tb_ret_addr;
-
TCGPowerISA have_isa;
static bool have_isel;
bool have_altivec;
@@ -184,35 +182,41 @@ static inline bool in_range_b(tcg_target_long target)
return target == sextract64(target, 0, 26);
}
-static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
+ const tcg_insn_unit *target)
{
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
tcg_debug_assert(in_range_b(disp));
return disp & 0x3fffffc;
}
-static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
+static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
+
if (in_range_b(disp)) {
- *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc);
+ *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
return true;
}
return false;
}
-static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
+ const tcg_insn_unit *target)
{
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
tcg_debug_assert(disp == (int16_t) disp);
return disp & 0xfffc;
}
-static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
+static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
+
if (disp == (int16_t) disp) {
- *pc = (*pc & ~0xfffc) | (disp & 0xfffc);
+ *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
return true;
}
return false;
@@ -673,12 +677,12 @@ static const uint32_t tcg_to_isel[] = {
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
- tcg_insn_unit *target;
+ const tcg_insn_unit *target;
int16_t lo;
int32_t hi;
value += addend;
- target = (tcg_insn_unit *)value;
+ target = (const tcg_insn_unit *)value;
switch (type) {
case R_PPC_REL14:
@@ -837,7 +841,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
}
/* Load addresses within the TB with one insn. */
- tb_diff = arg - (intptr_t)s->code_gen_ptr;
+ tb_diff = tcg_tbrel_diff(s, (void *)arg);
if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
return;
@@ -890,7 +894,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
/* Use the constant pool, if possible. */
if (!in_prologue && USE_REG_TB) {
new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
- -(intptr_t)s->code_gen_ptr);
+ tcg_tbrel_diff(s, NULL));
tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
return;
}
@@ -940,7 +944,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
*/
if (USE_REG_TB) {
rel = R_PPC_ADDR16;
- add = -(intptr_t)s->code_gen_ptr;
+ add = tcg_tbrel_diff(s, NULL);
} else {
rel = R_PPC_ADDR32;
add = 0;
@@ -1106,7 +1110,7 @@ static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
tcg_out_zori32(s, dst, src, c, XORI, XORIS);
}
-static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
+static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
{
ptrdiff_t disp = tcg_pcrel_diff(s, target);
if (in_range_b(disp)) {
@@ -1544,7 +1548,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
{
if (l->has_value) {
- bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr);
+ bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
} else {
tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
}
@@ -1722,13 +1726,13 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
tcg_out32(s, insn);
}
-void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
- uintptr_t addr)
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
{
if (TCG_TARGET_REG_BITS == 64) {
tcg_insn_unit i1, i2;
intptr_t tb_diff = addr - tc_ptr;
- intptr_t br_diff = addr - (jmp_addr + 4);
+ intptr_t br_diff = addr - (jmp_rx + 4);
uint64_t pair;
/* This does not exercise the range of the branch, but we do
@@ -1752,23 +1756,23 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
/* As per the enclosing if, this is ppc64. Avoid the _Static_assert
within qatomic_set that would fail to build a ppc32 host. */
- qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
- flush_icache_range(jmp_addr, jmp_addr + 8);
+ qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
} else {
- intptr_t diff = addr - jmp_addr;
+ intptr_t diff = addr - jmp_rx;
tcg_debug_assert(in_range_b(diff));
- qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
+ flush_idcache_range(jmp_rx, jmp_rw, 4);
}
}
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
{
#ifdef _CALL_AIX
/* Look through the descriptor. If the branch is in range, and we
don't have to spend too much effort on building the toc. */
- void *tgt = ((void **)target)[0];
- uintptr_t toc = ((uintptr_t *)target)[1];
+ const void *tgt = ((const void * const *)target)[0];
+ uintptr_t toc = ((const uintptr_t *)target)[1];
intptr_t diff = tcg_pcrel_diff(s, tgt);
if (in_range_b(diff) && toc == (uint32_t)toc) {
@@ -1997,7 +2001,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
label->datahi_reg = datahi_reg;
label->addrlo_reg = addrlo_reg;
label->addrhi_reg = addrhi_reg;
- label->raddr = raddr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = lptr;
}
@@ -2007,7 +2011,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
MemOp opc = get_memop(oi);
TCGReg hi, lo, arg = TCG_REG_R3;
- if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
+ if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -2055,7 +2059,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
MemOp s_bits = opc & MO_SIZE;
TCGReg hi, lo, arg = TCG_REG_R3;
- if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
+ if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -2306,10 +2310,10 @@ static void tcg_target_qemu_prologue(TCGContext *s)
int i;
#ifdef _CALL_AIX
- void **desc = (void **)s->code_ptr;
- desc[0] = desc + 2; /* entry point */
- desc[1] = 0; /* environment pointer */
- s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
+ const void **desc = (const void **)s->code_ptr;
+ desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */
+ desc[1] = 0; /* environment pointer */
+ s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
#endif
tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
@@ -2341,7 +2345,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out32(s, BCCTR | BO_ALWAYS);
/* Epilogue */
- s->code_gen_epilogue = tb_ret_addr = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
@@ -2362,7 +2366,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
switch (opc) {
case INDEX_op_exit_tb:
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
- tcg_out_b(s, 0, tb_ret_addr);
+ tcg_out_b(s, 0, tcg_code_gen_epilogue);
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_insn_offset) {
@@ -2392,9 +2396,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
set_jmp_reset_offset(s, args[0]);
if (USE_REG_TB) {
/* For the unlinked case, need to reset TCG_REG_TB. */
- c = -tcg_current_code_size(s);
- assert(c == (int16_t)c);
- tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
+ tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
+ -tcg_current_code_size(s));
}
break;
case INDEX_op_goto_ptr:
@@ -2411,7 +2414,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
uint32_t insn = B;
if (l->has_value) {
- insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr);
+ insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
+ l->u.value_ptr);
} else {
tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
}
@@ -3847,7 +3851,7 @@ static DebugFrame debug_frame = {
}
};
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
uint8_t *p = &debug_frame.fde_reg_ofs[3];
int i;
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index a509a19628..d1339afc66 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -108,6 +108,7 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_mulsh_i32 1
#define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_add2_i32 0
@@ -175,7 +176,7 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_bitsel_vec have_vsx
#define TCG_TARGET_HAS_cmpsel_vec 0
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 4089e29cd9..c60b91ba58 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -425,39 +425,44 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
* Relocations
*/
-static bool reloc_sbimm12(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- intptr_t offset = (intptr_t)target - (intptr_t)code_ptr;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
- if (offset == sextreg(offset, 1, 12) << 1) {
- code_ptr[0] |= encode_sbimm12(offset);
+ tcg_debug_assert((offset & 1) == 0);
+ if (offset == sextreg(offset, 0, 12)) {
+ *src_rw |= encode_sbimm12(offset);
return true;
}
return false;
}
-static bool reloc_jimm20(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- intptr_t offset = (intptr_t)target - (intptr_t)code_ptr;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
- if (offset == sextreg(offset, 1, 20) << 1) {
- code_ptr[0] |= encode_ujimm20(offset);
+ tcg_debug_assert((offset & 1) == 0);
+ if (offset == sextreg(offset, 0, 20)) {
+ *src_rw |= encode_ujimm20(offset);
return true;
}
return false;
}
-static bool reloc_call(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
- intptr_t offset = (intptr_t)target - (intptr_t)code_ptr;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
int32_t lo = sextreg(offset, 0, 12);
int32_t hi = offset - lo;
if (offset == hi + lo) {
- code_ptr[0] |= encode_uimm20(hi);
- code_ptr[1] |= encode_imm12(lo);
+ src_rw[0] |= encode_uimm20(hi);
+ src_rw[1] |= encode_imm12(lo);
return true;
}
@@ -467,43 +472,16 @@ static bool reloc_call(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
- uint32_t insn = *code_ptr;
- intptr_t diff;
- bool short_jmp;
-
tcg_debug_assert(addend == 0);
-
switch (type) {
case R_RISCV_BRANCH:
- diff = value - (uintptr_t)code_ptr;
- short_jmp = diff == sextreg(diff, 0, 12);
- if (short_jmp) {
- return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
- } else {
- /* Invert the condition */
- insn = insn ^ (1 << 12);
- /* Clear the offset */
- insn &= 0x01fff07f;
- /* Set the offset to the PC + 8 */
- insn |= encode_sbimm12(8);
-
- /* Move forward */
- code_ptr[0] = insn;
-
- /* Overwrite the NOP with jal x0,value */
- diff = value - (uintptr_t)(code_ptr + 1);
- insn = encode_uj(OPC_JAL, TCG_REG_ZERO, diff);
- code_ptr[1] = insn;
-
- return true;
- }
- break;
+ return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
case R_RISCV_JAL:
return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
case R_RISCV_CALL:
return reloc_call(code_ptr, (tcg_insn_unit *)value);
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
@@ -557,7 +535,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
if (tmp == (int32_t)tmp) {
tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
- ret = reloc_call(s->code_ptr - 2, (tcg_insn_unit *)val);
+ ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val);
tcg_debug_assert(ret == true);
return;
}
@@ -777,21 +755,8 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
arg2 = t;
}
- if (l->has_value) {
- intptr_t diff = tcg_pcrel_diff(s, l->u.value_ptr);
- if (diff == sextreg(diff, 0, 12)) {
- tcg_out_opc_branch(s, op, arg1, arg2, diff);
- } else {
- /* Invert the conditional branch. */
- tcg_out_opc_branch(s, op ^ (1 << 12), arg1, arg2, 8);
- tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, diff - 4);
- }
- } else {
- tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
- tcg_out_opc_branch(s, op, arg1, arg2, 0);
- /* NOP to allow patching later */
- tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0);
- }
+ tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
+ tcg_out_opc_branch(s, op, arg1, arg2, 0);
}
static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
@@ -854,28 +819,21 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
g_assert_not_reached();
}
-static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
-{
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
- tcg_debug_assert(offset == sextreg(offset, 1, 20) << 1);
- tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
-}
-
-static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
+static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
{
TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
ptrdiff_t offset = tcg_pcrel_diff(s, arg);
int ret;
- if (offset == sextreg(offset, 1, 20) << 1) {
+ tcg_debug_assert((offset & 1) == 0);
+ if (offset == sextreg(offset, 0, 20)) {
/* short jump: -2097150 to 2097152 */
tcg_out_opc_jump(s, OPC_JAL, link, offset);
- } else if (TCG_TARGET_REG_BITS == 32 ||
- offset == sextreg(offset, 1, 31) << 1) {
+ } else if (TCG_TARGET_REG_BITS == 32 || offset == (int32_t)offset) {
/* long jump: -2147483646 to 2147483648 */
tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
- ret = reloc_call(s->code_ptr - 2, arg);\
+ ret = reloc_call(s->code_ptr - 2, arg);
tcg_debug_assert(ret == true);
} else if (TCG_TARGET_REG_BITS == 64) {
/* far jump: 64-bit */
@@ -888,7 +846,7 @@ static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
}
}
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
{
tcg_out_call_int(s, arg, false);
}
@@ -962,6 +920,13 @@ QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
+static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
+{
+ tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
+ bool ok = reloc_jimm20(s->code_ptr - 1, target);
+ tcg_debug_assert(ok);
+}
+
static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
TCGReg addrh, TCGMemOpIdx oi,
tcg_insn_unit **label_ptr, bool is_load)
@@ -1007,8 +972,6 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
/* Compare masked address with the TLB entry. */
label_ptr[0] = s->code_ptr;
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
- /* NOP to allow patching later */
- tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0);
/* TLB Hit - translate address using addend. */
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
@@ -1033,7 +996,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
- label->raddr = raddr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr[0];
}
@@ -1052,8 +1015,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
/* resolve label address */
- if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH,
- (intptr_t) s->code_ptr, 0)) {
+ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -1087,8 +1049,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
/* resolve label address */
- if (!patch_reloc(l->label_ptr[0], R_RISCV_BRANCH,
- (intptr_t) s->code_ptr, 0)) {
+ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
@@ -1274,7 +1235,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
#endif
}
-static tcg_insn_unit *tb_ret_addr;
+static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
@@ -1288,7 +1249,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_exit_tb:
/* Reuse the zeroing that exists for goto_ptr. */
if (a0 == 0) {
- tcg_out_call_int(s, s->code_gen_epilogue, true);
+ tcg_out_call_int(s, tcg_code_gen_epilogue, true);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
tcg_out_call_int(s, tb_ret_addr, true);
@@ -1822,11 +1783,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
/* Return path for goto_ptr. Set return value to 0 */
- s->code_gen_epilogue = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
/* TB epilogue */
- tb_ret_addr = s->code_ptr;
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
@@ -1907,7 +1868,7 @@ static const DebugFrame debug_frame = {
}
};
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index c1bd52bb9a..727c8df418 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -119,6 +119,7 @@ typedef enum {
#define TCG_TARGET_HAS_direct_jump 0
#define TCG_TARGET_HAS_brcond2 1
#define TCG_TARGET_HAS_setcond2 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_movcond_i64 0
@@ -160,7 +161,7 @@ typedef enum {
#endif
/* not defined -- call should be eliminated at compile time */
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#define TCG_TARGET_DEFAULT_MO (0)
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index c5e096449b..d7ef079055 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -363,36 +363,37 @@ static void * const qemu_st_helpers[16] = {
};
#endif
-static tcg_insn_unit *tb_ret_addr;
+static const tcg_insn_unit *tb_ret_addr;
uint64_t s390_facilities;
-static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+static bool patch_reloc(tcg_insn_unit *src_rw, int type,
intptr_t value, intptr_t addend)
{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
intptr_t pcrel2;
uint32_t old;
value += addend;
- pcrel2 = (tcg_insn_unit *)value - code_ptr;
+ pcrel2 = (tcg_insn_unit *)value - src_rx;
switch (type) {
case R_390_PC16DBL:
if (pcrel2 == (int16_t)pcrel2) {
- tcg_patch16(code_ptr, pcrel2);
+ tcg_patch16(src_rw, pcrel2);
return true;
}
break;
case R_390_PC32DBL:
if (pcrel2 == (int32_t)pcrel2) {
- tcg_patch32(code_ptr, pcrel2);
+ tcg_patch32(src_rw, pcrel2);
return true;
}
break;
case R_390_20:
if (value == sextract64(value, 0, 20)) {
- old = *(uint32_t *)code_ptr & 0xf00000ff;
+ old = *(uint32_t *)src_rw & 0xf00000ff;
old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
- tcg_patch32(code_ptr, old);
+ tcg_patch32(src_rw, old);
return true;
}
break;
@@ -630,7 +631,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
return;
}
} else if (USE_REG_TB && !in_prologue) {
- ptrdiff_t off = sval - (uintptr_t)s->code_gen_ptr;
+ ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
if (off == sextract64(off, 0, 20)) {
/* This is certain to be an address within TB, and therefore
OFF will be negative; don't try RX_LA. */
@@ -655,7 +656,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
} else if (USE_REG_TB && !in_prologue) {
tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
- -(intptr_t)s->code_gen_ptr);
+ tcg_tbrel_diff(s, NULL));
} else {
TCGReg base = ret ? ret : TCG_TMP0;
tcg_out_insn(s, RIL, LARL, base, 0);
@@ -730,7 +731,8 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
}
/* load data from an absolute host address */
-static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
+static void tcg_out_ld_abs(TCGContext *s, TCGType type,
+ TCGReg dest, const void *abs)
{
intptr_t addr = (intptr_t)abs;
@@ -746,7 +748,7 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
}
}
if (USE_REG_TB) {
- ptrdiff_t disp = abs - (void *)s->code_gen_ptr;
+ ptrdiff_t disp = tcg_tbrel_diff(s, abs);
if (disp == sextract64(disp, 0, 20)) {
tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
return;
@@ -956,7 +958,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
- -(intptr_t)s->code_gen_ptr);
+ tcg_tbrel_diff(s, NULL));
return;
}
} else {
@@ -1015,7 +1017,7 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
} else if (USE_REG_TB) {
tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
new_pool_label(s, val, R_390_20, s->code_ptr - 2,
- -(intptr_t)s->code_gen_ptr);
+ tcg_tbrel_diff(s, NULL));
} else {
/* Perform the OR via sequential modifications to the high and
low parts. Do this via recursion to handle 16-bit vs 32-bit
@@ -1050,7 +1052,7 @@ static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
} else if (USE_REG_TB) {
tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
new_pool_label(s, val, R_390_20, s->code_ptr - 2,
- -(intptr_t)s->code_gen_ptr);
+ tcg_tbrel_diff(s, NULL));
} else {
/* Perform the xor by parts. */
tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
@@ -1108,12 +1110,12 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
op = (is_unsigned ? RXY_CLY : RXY_CY);
tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
- 4 - (intptr_t)s->code_gen_ptr);
+ 4 - tcg_tbrel_diff(s, NULL));
} else {
op = (is_unsigned ? RXY_CLG : RXY_CG);
tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
- -(intptr_t)s->code_gen_ptr);
+ tcg_tbrel_diff(s, NULL));
}
goto exit;
} else {
@@ -1302,9 +1304,9 @@ static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
}
-static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest)
+static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
{
- ptrdiff_t off = dest - s->code_ptr;
+ ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
if (off == (int16_t)off) {
tcg_out_insn(s, RI, BRC, cc, off);
} else if (off == (int32_t)off) {
@@ -1333,34 +1335,18 @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
TCGReg r1, TCGReg r2, TCGLabel *l)
{
- intptr_t off = 0;
-
- if (l->has_value) {
- off = l->u.value_ptr - s->code_ptr;
- tcg_debug_assert(off == (int16_t)off);
- } else {
- tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
- }
-
+ tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
- tcg_out16(s, off);
+ tcg_out16(s, 0);
tcg_out16(s, cc << 12 | (opc & 0xff));
}
static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
TCGReg r1, int i2, TCGLabel *l)
{
- tcg_target_long off = 0;
-
- if (l->has_value) {
- off = l->u.value_ptr - s->code_ptr;
- tcg_debug_assert(off == (int16_t)off);
- } else {
- tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
- }
-
+ tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
- tcg_out16(s, off);
+ tcg_out16(s, 0);
tcg_out16(s, (i2 << 8) | (opc & 0xff));
}
@@ -1415,9 +1401,9 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
tgen_branch(s, cc, l);
}
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
{
- ptrdiff_t off = dest - s->code_ptr;
+ ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
if (off == (int32_t)off) {
tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
} else {
@@ -1601,7 +1587,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
label->oi = oi;
label->datalo_reg = data;
label->addrlo_reg = addr;
- label->raddr = raddr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr;
}
@@ -1613,7 +1599,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
MemOp opc = get_memop(oi);
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
- (intptr_t)s->code_ptr, 2)) {
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
return false;
}
@@ -1638,7 +1624,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
MemOp opc = get_memop(oi);
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
- (intptr_t)s->code_ptr, 2)) {
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
return false;
}
@@ -1756,7 +1742,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* Reuse the zeroing that exists for goto_ptr. */
a0 = args[0];
if (a0 == 0) {
- tgen_gotoi(s, S390_CC_ALWAYS, s->code_gen_epilogue);
+ tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
@@ -1766,7 +1752,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_goto_tb:
a0 = args[0];
if (s->tb_jmp_insn_offset) {
- /* branch displacement must be aligned for atomic patching;
+ /*
+ * branch displacement must be aligned for atomic patching;
* see if we need to add extra nop before branch
*/
if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
@@ -1779,7 +1766,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else {
/* load address stored at s->tb_jmp_target_addr + a0 */
tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
- s->tb_jmp_target_addr + a0);
+ tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
/* and go there */
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
}
@@ -1789,8 +1776,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
TCG_REG_TB to the beginning of this TB. */
if (USE_REG_TB) {
int ofs = -tcg_current_code_size(s);
- assert(ofs == (int16_t)ofs);
- tcg_out_insn(s, RI, AGHI, TCG_REG_TB, ofs);
+ /* All TB are restricted to 64KiB by unwind info. */
+ tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
+ tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
+ TCG_REG_TB, TCG_REG_NONE, ofs);
}
break;
@@ -2561,11 +2550,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
* Return path for goto_ptr. Set return value to 0, a-la exit_tb,
* and fall through to the rest of the epilogue.
*/
- s->code_gen_epilogue = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
/* TB epilogue */
- tb_ret_addr = s->code_ptr;
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
/* lmg %r6,%r15,fs+48(%r15) (restore registers) */
tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
@@ -2620,7 +2609,7 @@ static const DebugFrame debug_frame = {
}
};
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index b4feb2f55a..641464eea4 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -97,6 +97,7 @@ extern uint64_t s390_facilities;
#define TCG_TARGET_HAS_extrh_i64_i32 0
#define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1
@@ -145,12 +146,12 @@ enum {
TCG_AREG0 = TCG_REG_R10,
};
-static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
- uintptr_t jmp_addr, uintptr_t addr)
+static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
{
/* patch the branch destination */
- intptr_t disp = addr - (jmp_addr - 2);
- qatomic_set((int32_t *)jmp_addr, disp / 2);
+ intptr_t disp = addr - (jmp_rx - 2);
+ qatomic_set((int32_t *)jmp_rw, disp / 2);
/* no need to flush icache explicitly */
}
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 6775bd30fc..922ae96481 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -291,14 +291,15 @@ static inline int check_fit_i32(int32_t val, unsigned int bits)
# define check_fit_ptr check_fit_i32
#endif
-static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+static bool patch_reloc(tcg_insn_unit *src_rw, int type,
intptr_t value, intptr_t addend)
{
- uint32_t insn = *code_ptr;
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ uint32_t insn = *src_rw;
intptr_t pcrel;
value += addend;
- pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);
+ pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, src_rx);
switch (type) {
case R_SPARC_WDISP16:
@@ -315,7 +316,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
g_assert_not_reached();
}
- *code_ptr = insn;
+ *src_rw = insn;
return true;
}
@@ -440,7 +441,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
/* A 13-bit constant relative to the TB. */
if (!in_prologue && USE_REG_TB) {
- test = arg - (uintptr_t)s->code_gen_ptr;
+ test = tcg_tbrel_diff(s, (void *)arg);
if (check_fit_ptr(test, 13)) {
tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD);
return;
@@ -537,15 +538,15 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
return false;
}
-static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg)
+static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg)
{
- intptr_t diff = arg - (uintptr_t)s->code_gen_ptr;
+ intptr_t diff = tcg_tbrel_diff(s, arg);
if (USE_REG_TB && check_fit_ptr(diff, 13)) {
tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff);
return;
}
- tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
- tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
+ tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff);
+ tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff);
}
static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
@@ -840,7 +841,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
}
-static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest,
+static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest,
bool in_prologue)
{
ptrdiff_t disp = tcg_pcrel_diff(s, dest);
@@ -855,7 +856,7 @@ static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest,
}
}
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
{
tcg_out_call_nodelay(s, dest, false);
tcg_out_nop(s);
@@ -868,8 +869,8 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
}
#ifdef CONFIG_SOFTMMU
-static tcg_insn_unit *qemu_ld_trampoline[16];
-static tcg_insn_unit *qemu_st_trampoline[16];
+static const tcg_insn_unit *qemu_ld_trampoline[16];
+static const tcg_insn_unit *qemu_st_trampoline[16];
static void emit_extend(TCGContext *s, TCGReg r, int op)
{
@@ -930,7 +931,7 @@ static void build_trampolines(TCGContext *s)
while ((uintptr_t)s->code_ptr & 15) {
tcg_out_nop(s);
}
- qemu_ld_trampoline[i] = s->code_ptr;
+ qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
if (SPARC64 || TARGET_LONG_BITS == 32) {
ra = TCG_REG_O3;
@@ -958,7 +959,7 @@ static void build_trampolines(TCGContext *s)
while ((uintptr_t)s->code_ptr & 15) {
tcg_out_nop(s);
}
- qemu_st_trampoline[i] = s->code_ptr;
+ qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
if (SPARC64) {
emit_extend(s, TCG_REG_O2, i);
@@ -1038,7 +1039,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_nop(s);
/* Epilogue for goto_ptr. */
- s->code_gen_epilogue = s->code_ptr;
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
/* delay slot */
tcg_out_movi_imm13(s, TCG_REG_O0, 0);
@@ -1163,7 +1164,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
#ifdef CONFIG_SOFTMMU
unsigned memi = get_mmuidx(oi);
TCGReg addrz, param;
- tcg_insn_unit *func;
+ const tcg_insn_unit *func;
tcg_insn_unit *label_ptr;
addrz = tcg_out_tlb_load(s, addr, memi, memop,
@@ -1245,7 +1246,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
#ifdef CONFIG_SOFTMMU
unsigned memi = get_mmuidx(oi);
TCGReg addrz, param;
- tcg_insn_unit *func;
+ const tcg_insn_unit *func;
tcg_insn_unit *label_ptr;
addrz = tcg_out_tlb_load(s, addr, memi, memop,
@@ -1313,7 +1314,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_movi_imm13(s, TCG_REG_O0, a0);
break;
} else if (USE_REG_TB) {
- intptr_t tb_diff = a0 - (uintptr_t)s->code_gen_ptr;
+ intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
if (check_fit_ptr(tb_diff, 13)) {
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
/* Note that TCG_REG_TB has been unwound to O1. */
@@ -1345,8 +1346,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
} else {
/* indirect jump method */
- tcg_out_ld_ptr(s, TCG_REG_TB,
- (uintptr_t)(s->tb_jmp_target_addr + a0));
+ tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0);
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
tcg_out_nop(s);
}
@@ -1816,16 +1816,16 @@ static const DebugFrame debug_frame = {
.fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */
};
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
-void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
- uintptr_t addr)
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
{
intptr_t tb_disp = addr - tc_ptr;
- intptr_t br_disp = addr - jmp_addr;
+ intptr_t br_disp = addr - jmp_rx;
tcg_insn_unit i1, i2;
/* We can reach the entire address space for ILP32.
@@ -1834,9 +1834,9 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
tcg_debug_assert(br_disp == (int32_t)br_disp);
if (!USE_REG_TB) {
- qatomic_set((uint32_t *)jmp_addr,
+ qatomic_set((uint32_t *)jmp_rw,
deposit32(CALL, 0, 30, br_disp >> 2));
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ flush_idcache_range(jmp_rx, jmp_rw, 4);
return;
}
@@ -1859,6 +1859,6 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
| INSN_IMM13((tb_disp & 0x3ff) | -0x400));
}
- qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
- flush_icache_range(jmp_addr, jmp_addr + 8);
+ qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1));
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
}
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index d8b0e32e2e..95ab9af955 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -126,6 +126,7 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_goto_ptr 1
#define TCG_TARGET_HAS_direct_jump 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_extrl_i64_i32 1
#define TCG_TARGET_HAS_extrh_i64_i32 1
@@ -168,7 +169,7 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#define TCG_TARGET_NEED_POOL_LABELS
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
index 05f9b3ccd6..c3ce88e69d 100644
--- a/tcg/tcg-ldst.c.inc
+++ b/tcg/tcg-ldst.c.inc
@@ -28,7 +28,7 @@ typedef struct TCGLabelQemuLdst {
TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */
TCGReg datalo_reg; /* reg index for low word to be loaded or stored */
TCGReg datahi_reg; /* reg index for high word to be loaded or stored */
- tcg_insn_unit *raddr; /* gen code addr of the next IR of qemu_ld/st IR */
+ const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */
tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
} TCGLabelQemuLdst;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 4b8a473fad..0374b5d56d 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2664,9 +2664,20 @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
/* QEMU specific operations. */
-void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx)
-{
- uintptr_t val = (uintptr_t)tb + idx;
+void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
+{
+ /*
+ * Let the jit code return the read-only version of the
+ * TranslationBlock, so that we minimize the pc-relative
+ * distance of the address of the exit_tb code to TB.
+ * This will improve utilization of pc-relative address loads.
+ *
+ * TODO: Move this to translator_loop, so that all const
+ * TranslationBlock pointers refer to read-only memory.
+ * This requires coordination with targets that do not use
+ * the translator_loop.
+ */
+ uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
if (tb == NULL) {
tcg_debug_assert(idx == 0);
@@ -2883,7 +2894,11 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
}
addr = plugin_prep_mem_callbacks(addr);
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+ if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
+ gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
+ } else {
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+ }
plugin_gen_mem_callbacks(addr, info);
if (swap) {
diff --git a/tcg/tcg-pool.c.inc b/tcg/tcg-pool.c.inc
index 82cbcc89bd..90c2e63b7f 100644
--- a/tcg/tcg-pool.c.inc
+++ b/tcg/tcg-pool.c.inc
@@ -140,6 +140,8 @@ static int tcg_out_pool_finalize(TCGContext *s)
for (; p != NULL; p = p->next) {
size_t size = sizeof(tcg_target_ulong) * p->nlong;
+ uintptr_t value;
+
if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
if (unlikely(a > s->code_gen_highwater)) {
return -1;
@@ -148,7 +150,9 @@ static int tcg_out_pool_finalize(TCGContext *s)
a += size;
l = p;
}
- if (!patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend)) {
+
+ value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
+ if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
return -2;
}
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index ebb9466ffc..472bf1755b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -97,7 +97,7 @@ typedef struct QEMU_PACKED {
DebugFrameFDEHeader fde;
} DebugFrameHeader;
-static void tcg_register_jit_int(void *buf, size_t size,
+static void tcg_register_jit_int(const void *buf, size_t size,
const void *debug_frame,
size_t debug_frame_size)
__attribute__((unused));
@@ -149,7 +149,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
intptr_t arg2);
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
TCGReg base, intptr_t ofs);
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
static int tcg_target_const_match(tcg_target_long val, TCGType type,
const TCGArgConstraint *arg_ct);
#ifdef TCG_TARGET_NEED_LDST_LABELS
@@ -161,6 +161,12 @@ static int tcg_out_ldst_finalize(TCGContext *s);
static TCGContext **tcg_ctxs;
static unsigned int n_tcg_ctxs;
TCGv_env cpu_env = 0;
+const void *tcg_code_gen_epilogue;
+uintptr_t tcg_splitwx_diff;
+
+#ifndef CONFIG_TCG_INTERPRETER
+tcg_prologue_fn *tcg_qemu_tb_exec;
+#endif
struct tcg_region_tree {
QemuMutex lock;
@@ -296,11 +302,11 @@ static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
}
-static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
+static void tcg_out_label(TCGContext *s, TCGLabel *l)
{
tcg_debug_assert(!l->has_value);
l->has_value = 1;
- l->u.value_ptr = ptr;
+ l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
}
TCGLabel *gen_new_label(void)
@@ -401,8 +407,9 @@ static void tcg_region_trees_init(void)
}
}
-static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
+static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
{
+ void *p = tcg_splitwx_to_rw(cp);
size_t region_idx;
if (p < region.start_aligned) {
@@ -696,6 +703,7 @@ void tcg_region_init(void)
size_t region_size;
size_t n_regions;
size_t i;
+ uintptr_t splitwx_diff;
n_regions = tcg_n_regions();
@@ -726,6 +734,7 @@ void tcg_region_init(void)
region.end -= page_size;
/* set guard pages */
+ splitwx_diff = tcg_splitwx_diff;
for (i = 0; i < region.n; i++) {
void *start, *end;
int rc;
@@ -733,6 +742,10 @@ void tcg_region_init(void)
tcg_region_bounds(i, &start, &end);
rc = qemu_mprotect_none(end, page_size);
g_assert(!rc);
+ if (splitwx_diff) {
+ rc = qemu_mprotect_none(end + splitwx_diff, page_size);
+ g_assert(!rc);
+ }
}
tcg_region_trees_init();
@@ -747,6 +760,29 @@ void tcg_region_init(void)
#endif
}
+#ifdef CONFIG_DEBUG_TCG
+const void *tcg_splitwx_to_rx(void *rw)
+{
+ /* Pass NULL pointers unchanged. */
+ if (rw) {
+ g_assert(in_code_gen_buffer(rw));
+ rw += tcg_splitwx_diff;
+ }
+ return rw;
+}
+
+void *tcg_splitwx_to_rw(const void *rx)
+{
+ /* Pass NULL pointers unchanged. */
+ if (rx) {
+ rx -= tcg_splitwx_diff;
+ /* Assert that we end with a pointer in the rw region. */
+ g_assert(in_code_gen_buffer(rx));
+ }
+ return (void *)rx;
+}
+#endif /* CONFIG_DEBUG_TCG */
+
static void alloc_tcg_plugin_context(TCGContext *s)
{
#ifdef CONFIG_PLUGIN
@@ -1055,7 +1091,17 @@ void tcg_prologue_init(TCGContext *s)
s->code_ptr = buf0;
s->code_buf = buf0;
s->data_gen_ptr = NULL;
- s->code_gen_prologue = buf0;
+
+ /*
+ * The region trees are not yet configured, but tcg_splitwx_to_rx
+ * needs the bounds for an assert.
+ */
+ region.start = buf0;
+ region.end = buf0 + total_size;
+
+#ifndef CONFIG_TCG_INTERPRETER
+ tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
+#endif
/* Compute a high-water mark, at which we voluntarily flush the buffer
and start over. The size here is arbitrary, significantly larger
@@ -1078,7 +1124,10 @@ void tcg_prologue_init(TCGContext *s)
#endif
buf1 = s->code_ptr;
- flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
+#ifndef CONFIG_TCG_INTERPRETER
+ flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
+ tcg_ptr_byte_diff(buf1, buf0));
+#endif
/* Deduct the prologue from the buffer. */
prologue_size = tcg_current_code_size(s);
@@ -1088,7 +1137,7 @@ void tcg_prologue_init(TCGContext *s)
total_size -= prologue_size;
s->code_gen_buffer_size = total_size;
- tcg_register_jit(s->code_gen_buffer, total_size);
+ tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
@@ -1123,7 +1172,7 @@ void tcg_prologue_init(TCGContext *s)
/* Assert that goto_ptr is implemented completely. */
if (TCG_TARGET_HAS_goto_ptr) {
- tcg_debug_assert(s->code_gen_epilogue != NULL);
+ tcg_debug_assert(tcg_code_gen_epilogue != NULL);
}
}
@@ -1427,6 +1476,9 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_qemu_st_i64:
return true;
+ case INDEX_op_qemu_st8_i32:
+ return TCG_TARGET_HAS_qemu_st8_i32;
+
case INDEX_op_goto_ptr:
return TCG_TARGET_HAS_goto_ptr;
@@ -2087,6 +2139,7 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
break;
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
{
@@ -4216,8 +4269,13 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_reg_alloc_start(s);
- s->code_buf = tb->tc.ptr;
- s->code_ptr = tb->tc.ptr;
+ /*
+ * Reset the buffer pointers when restarting after overflow.
+ * TODO: Move this into translate-all.c with the rest of the
+ * buffer management. Having only this done here is confusing.
+ */
+ s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
+ s->code_ptr = s->code_buf;
#ifdef TCG_TARGET_NEED_LDST_LABELS
QSIMPLEQ_INIT(&s->ldst_labels);
@@ -4271,7 +4329,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
break;
case INDEX_op_set_label:
tcg_reg_alloc_bb_end(s, s->reserved_regs);
- tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
+ tcg_out_label(s, arg_label(op->args[0]));
break;
case INDEX_op_call:
tcg_reg_alloc_call(s, op);
@@ -4320,8 +4378,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
return -2;
}
+#ifndef CONFIG_TCG_INTERPRETER
/* flush instruction cache */
- flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+ flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
+ (uintptr_t)s->code_buf,
+ tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
+#endif
return tcg_current_code_size(s);
}
@@ -4449,7 +4511,7 @@ static int find_string(const char *strtab, const char *str)
}
}
-static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
+static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
const void *debug_frame,
size_t debug_frame_size)
{
@@ -4651,13 +4713,13 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
/* No support for the feature. Provide the entry point expected by exec.c,
and implement the internal function we declared earlier. */
-static void tcg_register_jit_int(void *buf, size_t size,
+static void tcg_register_jit_int(const void *buf, size_t size,
const void *debug_frame,
size_t debug_frame_size)
{
}
-void tcg_register_jit(void *buf, size_t buf_size)
+void tcg_register_jit(const void *buf, size_t buf_size)
{
}
#endif /* ELF_HOST_MACHINE */
diff --git a/tcg/tci.c b/tcg/tci.c
index 5d97b7c71c..2bcc4409be 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -164,34 +164,34 @@ static uint64_t tci_uint64(uint32_t high, uint32_t low)
#endif
/* Read constant (native size) from bytecode. */
-static tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
+static tcg_target_ulong tci_read_i(const uint8_t **tb_ptr)
{
- tcg_target_ulong value = *(tcg_target_ulong *)(*tb_ptr);
+ tcg_target_ulong value = *(const tcg_target_ulong *)(*tb_ptr);
*tb_ptr += sizeof(value);
return value;
}
/* Read unsigned constant (32 bit) from bytecode. */
-static uint32_t tci_read_i32(uint8_t **tb_ptr)
+static uint32_t tci_read_i32(const uint8_t **tb_ptr)
{
- uint32_t value = *(uint32_t *)(*tb_ptr);
+ uint32_t value = *(const uint32_t *)(*tb_ptr);
*tb_ptr += sizeof(value);
return value;
}
/* Read signed constant (32 bit) from bytecode. */
-static int32_t tci_read_s32(uint8_t **tb_ptr)
+static int32_t tci_read_s32(const uint8_t **tb_ptr)
{
- int32_t value = *(int32_t *)(*tb_ptr);
+ int32_t value = *(const int32_t *)(*tb_ptr);
*tb_ptr += sizeof(value);
return value;
}
#if TCG_TARGET_REG_BITS == 64
/* Read constant (64 bit) from bytecode. */
-static uint64_t tci_read_i64(uint8_t **tb_ptr)
+static uint64_t tci_read_i64(const uint8_t **tb_ptr)
{
- uint64_t value = *(uint64_t *)(*tb_ptr);
+ uint64_t value = *(const uint64_t *)(*tb_ptr);
*tb_ptr += sizeof(value);
return value;
}
@@ -199,7 +199,7 @@ static uint64_t tci_read_i64(uint8_t **tb_ptr)
/* Read indexed register (native size) from bytecode. */
static tcg_target_ulong
-tci_read_r(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+tci_read_r(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
{
tcg_target_ulong value = tci_read_reg(regs, **tb_ptr);
*tb_ptr += 1;
@@ -207,7 +207,7 @@ tci_read_r(const tcg_target_ulong *regs, uint8_t **tb_ptr)
}
/* Read indexed register (8 bit) from bytecode. */
-static uint8_t tci_read_r8(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint8_t tci_read_r8(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
{
uint8_t value = tci_read_reg8(regs, **tb_ptr);
*tb_ptr += 1;
@@ -216,7 +216,7 @@ static uint8_t tci_read_r8(const tcg_target_ulong *regs, uint8_t **tb_ptr)
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
/* Read indexed register (8 bit signed) from bytecode. */
-static int8_t tci_read_r8s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static int8_t tci_read_r8s(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
{
int8_t value = tci_read_reg8s(regs, **tb_ptr);
*tb_ptr += 1;
@@ -225,7 +225,8 @@ static int8_t tci_read_r8s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
#endif
/* Read indexed register (16 bit) from bytecode. */
-static uint16_t tci_read_r16(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint16_t tci_read_r16(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
uint16_t value = tci_read_reg16(regs, **tb_ptr);
*tb_ptr += 1;
@@ -234,7 +235,8 @@ static uint16_t tci_read_r16(const tcg_target_ulong *regs, uint8_t **tb_ptr)
#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
/* Read indexed register (16 bit signed) from bytecode. */
-static int16_t tci_read_r16s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static int16_t tci_read_r16s(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
int16_t value = tci_read_reg16s(regs, **tb_ptr);
*tb_ptr += 1;
@@ -243,7 +245,8 @@ static int16_t tci_read_r16s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
#endif
/* Read indexed register (32 bit) from bytecode. */
-static uint32_t tci_read_r32(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint32_t tci_read_r32(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
uint32_t value = tci_read_reg32(regs, **tb_ptr);
*tb_ptr += 1;
@@ -252,14 +255,16 @@ static uint32_t tci_read_r32(const tcg_target_ulong *regs, uint8_t **tb_ptr)
#if TCG_TARGET_REG_BITS == 32
/* Read two indexed registers (2 * 32 bit) from bytecode. */
-static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint64_t tci_read_r64(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
uint32_t low = tci_read_r32(regs, tb_ptr);
return tci_uint64(tci_read_r32(regs, tb_ptr), low);
}
#elif TCG_TARGET_REG_BITS == 64
/* Read indexed register (32 bit signed) from bytecode. */
-static int32_t tci_read_r32s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static int32_t tci_read_r32s(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
int32_t value = tci_read_reg32s(regs, **tb_ptr);
*tb_ptr += 1;
@@ -267,7 +272,8 @@ static int32_t tci_read_r32s(const tcg_target_ulong *regs, uint8_t **tb_ptr)
}
/* Read indexed register (64 bit) from bytecode. */
-static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint64_t tci_read_r64(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
uint64_t value = tci_read_reg64(regs, **tb_ptr);
*tb_ptr += 1;
@@ -277,7 +283,7 @@ static uint64_t tci_read_r64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
/* Read indexed register(s) with target address from bytecode. */
static target_ulong
-tci_read_ulong(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+tci_read_ulong(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
{
target_ulong taddr = tci_read_r(regs, tb_ptr);
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -288,7 +294,7 @@ tci_read_ulong(const tcg_target_ulong *regs, uint8_t **tb_ptr)
/* Read indexed register or constant (native size) from bytecode. */
static tcg_target_ulong
-tci_read_ri(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+tci_read_ri(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
{
tcg_target_ulong value;
TCGReg r = **tb_ptr;
@@ -302,7 +308,8 @@ tci_read_ri(const tcg_target_ulong *regs, uint8_t **tb_ptr)
}
/* Read indexed register or constant (32 bit) from bytecode. */
-static uint32_t tci_read_ri32(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint32_t tci_read_ri32(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
uint32_t value;
TCGReg r = **tb_ptr;
@@ -317,14 +324,16 @@ static uint32_t tci_read_ri32(const tcg_target_ulong *regs, uint8_t **tb_ptr)
#if TCG_TARGET_REG_BITS == 32
/* Read two indexed registers or constants (2 * 32 bit) from bytecode. */
-static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint64_t tci_read_ri64(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
uint32_t low = tci_read_ri32(regs, tb_ptr);
return tci_uint64(tci_read_ri32(regs, tb_ptr), low);
}
#elif TCG_TARGET_REG_BITS == 64
/* Read indexed register or constant (64 bit) from bytecode. */
-static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
+static uint64_t tci_read_ri64(const tcg_target_ulong *regs,
+ const uint8_t **tb_ptr)
{
uint64_t value;
TCGReg r = **tb_ptr;
@@ -338,7 +347,7 @@ static uint64_t tci_read_ri64(const tcg_target_ulong *regs, uint8_t **tb_ptr)
}
#endif
-static tcg_target_ulong tci_read_label(uint8_t **tb_ptr)
+static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
{
tcg_target_ulong label = tci_read_i(tb_ptr);
tci_assert(label != 0);
@@ -481,9 +490,10 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
* One possible operation in the pseudo code is a call to binary code.
* Therefore, disable CFI checks in the interpreter function
*/
-QEMU_DISABLE_CFI
-uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
+uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
+ const void *v_tb_ptr)
{
+ const uint8_t *tb_ptr = v_tb_ptr;
tcg_target_ulong regs[TCG_TARGET_NB_REGS];
long tcg_temps[CPU_TEMP_BUF_NLONGS];
uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
@@ -497,7 +507,7 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
TCGOpcode opc = tb_ptr[0];
#if defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
uint8_t op_size = tb_ptr[1];
- uint8_t *old_code_ptr = tb_ptr;
+ const uint8_t *old_code_ptr = tb_ptr;
#endif
tcg_target_ulong t0;
tcg_target_ulong t1;
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 231b9b1775..d5a4d9d37c 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -545,7 +545,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
old_code_ptr[1] = s->code_ptr - old_code_ptr;
}
-static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_out_op_t(s, INDEX_op_call);
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index b84480f989..bb784e018e 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -88,6 +88,7 @@
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_goto_ptr 0
#define TCG_TARGET_HAS_direct_jump 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_extrl_i64_i32 0
@@ -198,11 +199,11 @@ void tci_disas(uint8_t opc);
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
-static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
- uintptr_t jmp_addr, uintptr_t addr)
+static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
{
/* patch the branch destination */
- qatomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4));
+ qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4));
/* no need to flush icache explicitly */
}
diff --git a/util/cacheflush.c b/util/cacheflush.c
index 2881832a38..6a20723902 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c
@@ -7,12 +7,81 @@
#include "qemu/osdep.h"
#include "qemu/cacheflush.h"
+#include "qemu/bitops.h"
#if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
/* Caches are coherent and do not require flushing; symbol inline. */
+#elif defined(__aarch64__)
+
+#ifdef CONFIG_DARWIN
+/* Apple does not expose CTR_EL0, so we must use system interfaces. */
+extern void sys_icache_invalidate(void *start, size_t len);
+extern void sys_dcache_flush(void *start, size_t len);
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+ sys_dcache_flush((void *)rw, len);
+ sys_icache_invalidate((void *)rx, len);
+}
+#else
+
+/*
+ * TODO: unify this with cacheinfo.c.
+ * We want to save the whole contents of CTR_EL0, so that we
+ * have more than the linesize, but also IDC and DIC.
+ */
+static unsigned int save_ctr_el0;
+static void __attribute__((constructor)) init_ctr_el0(void)
+{
+ asm volatile("mrs\t%0, ctr_el0" : "=r"(save_ctr_el0));
+}
+
+/*
+ * This is a copy of gcc's __aarch64_sync_cache_range, modified
+ * to fit this three-operand interface.
+ */
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+ const unsigned CTR_IDC = 1u << 28;
+ const unsigned CTR_DIC = 1u << 29;
+ const unsigned int ctr_el0 = save_ctr_el0;
+ const uintptr_t icache_lsize = 4 << extract32(ctr_el0, 0, 4);
+ const uintptr_t dcache_lsize = 4 << extract32(ctr_el0, 16, 4);
+ uintptr_t p;
+
+ /*
+ * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification
+ * is not required for instruction to data coherence.
+ */
+ if (!(ctr_el0 & CTR_IDC)) {
+ /*
+ * Loop over the address range, clearing one cache line at once.
+ * Data cache must be flushed to unification first to make sure
+ * the instruction cache fetches the updated data.
+ */
+ for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) {
+ asm volatile("dc\tcvau, %0" : : "r" (p) : "memory");
+ }
+ asm volatile("dsb\tish" : : : "memory");
+ }
+
+ /*
+ * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point
+ * of Unification is not required for instruction to data coherence.
+ */
+ if (!(ctr_el0 & CTR_DIC)) {
+ for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) {
+ asm volatile("ic\tivau, %0" : : "r"(p) : "memory");
+ }
+ asm volatile ("dsb\tish" : : : "memory");
+ }
+
+ asm volatile("isb" : : : "memory");
+}
+#endif /* CONFIG_DARWIN */
+
#elif defined(__mips__)
#ifdef __OpenBSD__
@@ -21,29 +90,32 @@
#include <sys/cachectl.h>
#endif
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- cacheflush((void *)start, stop - start, ICACHE);
+ if (rx != rw) {
+ cacheflush((void *)rw, len, DCACHE);
+ }
+ cacheflush((void *)rx, len, ICACHE);
}
#elif defined(__powerpc__)
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- uintptr_t p, start1, stop1;
+ uintptr_t p, b, e;
size_t dsize = qemu_dcache_linesize;
size_t isize = qemu_icache_linesize;
- start1 = start & ~(dsize - 1);
- stop1 = (stop + dsize - 1) & ~(dsize - 1);
- for (p = start1; p < stop1; p += dsize) {
+ b = rw & ~(dsize - 1);
+ e = (rw + len + dsize - 1) & ~(dsize - 1);
+ for (p = b; p < e; p += dsize) {
asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
}
asm volatile ("sync" : : : "memory");
- start &= start & ~(isize - 1);
- stop1 = (stop + isize - 1) & ~(isize - 1);
- for (p = start1; p < stop1; p += isize) {
+ b = rx & ~(isize - 1);
+ e = (rx + len + isize - 1) & ~(isize - 1);
+ for (p = b; p < e; p += isize) {
asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
}
asm volatile ("sync" : : : "memory");
@@ -52,20 +124,23 @@ void flush_icache_range(uintptr_t start, uintptr_t stop)
#elif defined(__sparc__)
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- uintptr_t p;
-
- for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
+ /* No additional data flush to the RW virtual address required. */
+ uintptr_t p, end = (rx + len + 7) & -8;
+ for (p = rx & -8; p < end; p += 8) {
__asm__ __volatile__("flush\t%0" : : "r" (p));
}
}
#else
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- __builtin___clear_cache((char *)start, (char *)stop);
+ if (rw != rx) {
+ __builtin___clear_cache((char *)rw, (char *)rw + len);
+ }
+ __builtin___clear_cache((char *)rx, (char *)rx + len);
}
#endif
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
index 7804c186b6..b182f0b693 100644
--- a/util/cacheinfo.c
+++ b/util/cacheinfo.c
@@ -166,9 +166,11 @@ static void fallback_cache_info(int *isize, int *dsize)
*isize = *dsize;
} else {
#if defined(_ARCH_PPC)
- /* For PPC, we're going to use the icache size computed for
- flush_icache_range. Which means that we must use the
- architecture minimum. */
+ /*
+ * For PPC, we're going to use the cache sizes computed for
+ * flush_idcache_range. Which means that we must use the
+ * architecture minimum.
+ */
*isize = *dsize = 16;
#else
/* Otherwise, 64 bytes is not uncommon. */
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index f1e2801b11..359c52df12 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -201,6 +201,8 @@ void *qemu_try_memalign(size_t alignment, size_t size)
if (alignment < sizeof(void*)) {
alignment = sizeof(void*);
+ } else {
+ g_assert(is_power_of_2(alignment));
}
#if defined(CONFIG_POSIX_MEMALIGN)
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 01787df74c..e6f83e10ed 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -39,6 +39,7 @@
#include "trace.h"
#include "qemu/sockets.h"
#include "qemu/cutils.h"
+#include <malloc.h>
/* this must come after including "trace.h" */
#include <shlobj.h>
@@ -56,10 +57,9 @@ void *qemu_try_memalign(size_t alignment, size_t size)
{
void *ptr;
- if (!size) {
- abort();
- }
- ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+ g_assert(size != 0);
+ g_assert(is_power_of_2(alignment));
+ ptr = _aligned_malloc(alignment, size);
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
@@ -93,9 +93,7 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
void qemu_vfree(void *ptr)
{
trace_qemu_vfree(ptr);
- if (ptr) {
- VirtualFree(ptr, 0, MEM_RELEASE);
- }
+ _aligned_free(ptr);
}
void qemu_anon_ram_free(void *ptr, size_t size)