aboutsummaryrefslogtreecommitdiff
path: root/tcg/tcg.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/tcg.c')
-rw-r--r--tcg/tcg.c631
1 files changed, 507 insertions, 124 deletions
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 1231c8ab4c..0b0fe9c7ad 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -63,6 +63,9 @@
#include "tcg/tcg-temp-internal.h"
#include "tcg-internal.h"
#include "accel/tcg/perf.h"
+#ifdef CONFIG_USER_ONLY
+#include "exec/user/guest-base.h"
+#endif
/* Forward declarations for functions declared in tcg-target.c.inc and
used here. */
@@ -197,6 +200,38 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
const TCGLdstHelperParam *p)
__attribute__((unused));
+static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
+ [MO_UB] = helper_ldub_mmu,
+ [MO_SB] = helper_ldsb_mmu,
+ [MO_UW] = helper_lduw_mmu,
+ [MO_SW] = helper_ldsw_mmu,
+ [MO_UL] = helper_ldul_mmu,
+ [MO_UQ] = helper_ldq_mmu,
+#if TCG_TARGET_REG_BITS == 64
+ [MO_SL] = helper_ldsl_mmu,
+ [MO_128] = helper_ld16_mmu,
+#endif
+};
+
+static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
+ [MO_8] = helper_stb_mmu,
+ [MO_16] = helper_stw_mmu,
+ [MO_32] = helper_stl_mmu,
+ [MO_64] = helper_stq_mmu,
+#if TCG_TARGET_REG_BITS == 64
+ [MO_128] = helper_st16_mmu,
+#endif
+};
+
+typedef struct {
+ MemOp atom; /* lg2 bits of atomicity required */
+ MemOp align; /* lg2 bits of alignment to use */
+} TCGAtomAlign;
+
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
+ MemOp host_atom, bool allow_two_ops)
+ __attribute__((unused));
+
TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
@@ -513,6 +548,82 @@ static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
tcg_out_movext1_new_src(s, i1, src1);
}
+/**
+ * tcg_out_movext3 -- move and extend three pair
+ * @s: tcg context
+ * @i1: first move description
+ * @i2: second move description
+ * @i3: third move description
+ * @scratch: temporary register, or -1 for none
+ *
+ * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
+ * between the sources and destinations.
+ */
+
+static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
+ const TCGMovExtend *i2, const TCGMovExtend *i3,
+ int scratch)
+{
+ TCGReg src1 = i1->src;
+ TCGReg src2 = i2->src;
+ TCGReg src3 = i3->src;
+
+ if (i1->dst != src2 && i1->dst != src3) {
+ tcg_out_movext1(s, i1);
+ tcg_out_movext2(s, i2, i3, scratch);
+ return;
+ }
+ if (i2->dst != src1 && i2->dst != src3) {
+ tcg_out_movext1(s, i2);
+ tcg_out_movext2(s, i1, i3, scratch);
+ return;
+ }
+ if (i3->dst != src1 && i3->dst != src2) {
+ tcg_out_movext1(s, i3);
+ tcg_out_movext2(s, i1, i2, scratch);
+ return;
+ }
+
+ /*
+ * There is a cycle. Since there are only 3 nodes, the cycle is
+ * either "clockwise" or "anti-clockwise", and can be solved with
+ * a single scratch or two xchg.
+ */
+ if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
+ /* "Clockwise" */
+ if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
+ tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
+ /* The data is now in the correct registers, now extend. */
+ tcg_out_movext1_new_src(s, i1, i1->dst);
+ tcg_out_movext1_new_src(s, i2, i2->dst);
+ tcg_out_movext1_new_src(s, i3, i3->dst);
+ } else {
+ tcg_debug_assert(scratch >= 0);
+ tcg_out_mov(s, i1->src_type, scratch, src1);
+ tcg_out_movext1(s, i3);
+ tcg_out_movext1(s, i2);
+ tcg_out_movext1_new_src(s, i1, scratch);
+ }
+ } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
+ /* "Anti-clockwise" */
+ if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
+ tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
+ /* The data is now in the correct registers, now extend. */
+ tcg_out_movext1_new_src(s, i1, i1->dst);
+ tcg_out_movext1_new_src(s, i2, i2->dst);
+ tcg_out_movext1_new_src(s, i3, i3->dst);
+ } else {
+ tcg_debug_assert(scratch >= 0);
+ tcg_out_mov(s, i1->src_type, scratch, src1);
+ tcg_out_movext1(s, i2);
+ tcg_out_movext1(s, i3);
+ tcg_out_movext1_new_src(s, i1, scratch);
+ }
+ } else {
+ g_assert_not_reached();
+ }
+}
+
#define C_PFX1(P, A) P##A
#define C_PFX2(P, A, B) P##A##_##B
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
@@ -757,7 +868,7 @@ static TCGHelperInfo info_helper_ld32_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
| dh_typemask(env, 1)
- | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i32, 3) /* unsigned oi */
| dh_typemask(ptr, 4) /* uintptr_t ra */
};
@@ -766,7 +877,16 @@ static TCGHelperInfo info_helper_ld64_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(i64, 0) /* return uint64_t */
| dh_typemask(env, 1)
- | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i64, 2) /* uint64_t addr */
+ | dh_typemask(i32, 3) /* unsigned oi */
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
+};
+
+static TCGHelperInfo info_helper_ld128_mmu = {
+ .flags = TCG_CALL_NO_WG,
+ .typemask = dh_typemask(i128, 0) /* return Int128 */
+ | dh_typemask(env, 1)
+ | dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i32, 3) /* unsigned oi */
| dh_typemask(ptr, 4) /* uintptr_t ra */
};
@@ -775,7 +895,7 @@ static TCGHelperInfo info_helper_st32_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(void, 0)
| dh_typemask(env, 1)
- | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i32, 3) /* uint32_t data */
| dh_typemask(i32, 4) /* unsigned oi */
| dh_typemask(ptr, 5) /* uintptr_t ra */
@@ -785,12 +905,22 @@ static TCGHelperInfo info_helper_st64_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(void, 0)
| dh_typemask(env, 1)
- | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i64, 2) /* uint64_t addr */
| dh_typemask(i64, 3) /* uint64_t data */
| dh_typemask(i32, 4) /* unsigned oi */
| dh_typemask(ptr, 5) /* uintptr_t ra */
};
+static TCGHelperInfo info_helper_st128_mmu = {
+ .flags = TCG_CALL_NO_WG,
+ .typemask = dh_typemask(void, 0)
+ | dh_typemask(env, 1)
+ | dh_typemask(i64, 2) /* uint64_t addr */
+ | dh_typemask(i128, 3) /* Int128 data */
+ | dh_typemask(i32, 4) /* unsigned oi */
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
+};
+
#ifdef CONFIG_TCG_INTERPRETER
static ffi_type *typecode_to_ffi(int argmask)
{
@@ -1204,8 +1334,10 @@ static void tcg_context_init(unsigned max_cpus)
init_call_layout(&info_helper_ld32_mmu);
init_call_layout(&info_helper_ld64_mmu);
+ init_call_layout(&info_helper_ld128_mmu);
init_call_layout(&info_helper_st32_mmu);
init_call_layout(&info_helper_st64_mmu);
+ init_call_layout(&info_helper_st128_mmu);
#ifdef CONFIG_TCG_INTERPRETER
init_ffi_layouts();
@@ -1391,6 +1523,9 @@ void tcg_func_start(TCGContext *s)
QTAILQ_INIT(&s->ops);
QTAILQ_INIT(&s->free_ops);
QSIMPLEQ_INIT(&s->labels);
+
+ tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
+ s->addr_type == TCG_TYPE_I64);
}
static TCGTemp *tcg_temp_alloc(TCGContext *s)
@@ -1707,15 +1842,26 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_exit_tb:
case INDEX_op_goto_tb:
case INDEX_op_goto_ptr:
- case INDEX_op_qemu_ld_i32:
- case INDEX_op_qemu_st_i32:
- case INDEX_op_qemu_ld_i64:
- case INDEX_op_qemu_st_i64:
+ case INDEX_op_qemu_ld_a32_i32:
+ case INDEX_op_qemu_ld_a64_i32:
+ case INDEX_op_qemu_st_a32_i32:
+ case INDEX_op_qemu_st_a64_i32:
+ case INDEX_op_qemu_ld_a32_i64:
+ case INDEX_op_qemu_ld_a64_i64:
+ case INDEX_op_qemu_st_a32_i64:
+ case INDEX_op_qemu_st_a64_i64:
return true;
- case INDEX_op_qemu_st8_i32:
+ case INDEX_op_qemu_st8_a32_i32:
+ case INDEX_op_qemu_st8_a64_i32:
return TCG_TARGET_HAS_qemu_st8_i32;
+ case INDEX_op_qemu_ld_a32_i128:
+ case INDEX_op_qemu_ld_a64_i128:
+ case INDEX_op_qemu_st_a32_i128:
+ case INDEX_op_qemu_st_a64_i128:
+ return TCG_TARGET_HAS_qemu_ldst_i128;
+
case INDEX_op_mov_i32:
case INDEX_op_setcond_i32:
case INDEX_op_brcond_i32:
@@ -2168,7 +2314,7 @@ static const char * const cond_name[] =
[TCG_COND_GTU] = "gtu"
};
-static const char * const ldst_name[] =
+static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
{
[MO_UB] = "ub",
[MO_SB] = "sb",
@@ -2182,6 +2328,8 @@ static const char * const ldst_name[] =
[MO_BEUL] = "beul",
[MO_BESL] = "besl",
[MO_BEUQ] = "beq",
+ [MO_128 + MO_BE] = "beo",
+ [MO_128 + MO_LE] = "leo",
};
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
@@ -2195,6 +2343,15 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
[MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
};
+static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
+ [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
+ [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
+ [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
+ [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
+ [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
+ [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
+};
+
static const char bswap_flag_name[][6] = {
[TCG_BSWAP_IZ] = "iz",
[TCG_BSWAP_OZ] = "oz",
@@ -2240,13 +2397,8 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
col += ne_fprintf(f, "\n ----");
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
- target_ulong a;
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
-#else
- a = op->args[i];
-#endif
- col += ne_fprintf(f, " " TARGET_FMT_lx, a);
+ col += ne_fprintf(f, " %016" PRIx64,
+ tcg_get_insn_start_param(op, i));
}
} else if (c == INDEX_op_call) {
const TCGHelperInfo *info = tcg_call_info(op);
@@ -2324,23 +2476,38 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
}
i = 1;
break;
- case INDEX_op_qemu_ld_i32:
- case INDEX_op_qemu_st_i32:
- case INDEX_op_qemu_st8_i32:
- case INDEX_op_qemu_ld_i64:
- case INDEX_op_qemu_st_i64:
+ case INDEX_op_qemu_ld_a32_i32:
+ case INDEX_op_qemu_ld_a64_i32:
+ case INDEX_op_qemu_st_a32_i32:
+ case INDEX_op_qemu_st_a64_i32:
+ case INDEX_op_qemu_st8_a32_i32:
+ case INDEX_op_qemu_st8_a64_i32:
+ case INDEX_op_qemu_ld_a32_i64:
+ case INDEX_op_qemu_ld_a64_i64:
+ case INDEX_op_qemu_st_a32_i64:
+ case INDEX_op_qemu_st_a64_i64:
+ case INDEX_op_qemu_ld_a32_i128:
+ case INDEX_op_qemu_ld_a64_i128:
+ case INDEX_op_qemu_st_a32_i128:
+ case INDEX_op_qemu_st_a64_i128:
{
+ const char *s_al, *s_op, *s_at;
MemOpIdx oi = op->args[k++];
MemOp op = get_memop(oi);
unsigned ix = get_mmuidx(oi);
- if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
- col += ne_fprintf(f, ",$0x%x,%u", op, ix);
+ s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
+ s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
+ op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
+
+ /* If all fields are accounted for, print symbolically. */
+ if (!op && s_al && s_op && s_at) {
+ col += ne_fprintf(f, ",%s%s%s,%u",
+ s_at, s_al, s_op, ix);
} else {
- const char *s_al, *s_op;
- s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
- s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
- col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
+ op = get_memop(oi);
+ col += ne_fprintf(f, ",$0x%x,%u", op, ix);
}
i = 1;
}
@@ -5087,6 +5254,92 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
}
}
+/**
+ * atom_and_align_for_opc:
+ * @s: tcg context
+ * @opc: memory operation code
+ * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
+ * @allow_two_ops: true if we are prepared to issue two operations
+ *
+ * Return the alignment and atomicity to use for the inline fast path
+ * for the given memory operation. The alignment may be larger than
+ * that specified in @opc, and the correct alignment will be diagnosed
+ * by the slow path helper.
+ *
+ * If @allow_two_ops, the host is prepared to test for 2x alignment,
+ * and issue two loads or stores for subalignment.
+ */
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
+ MemOp host_atom, bool allow_two_ops)
+{
+ MemOp align = get_alignment_bits(opc);
+ MemOp size = opc & MO_SIZE;
+ MemOp half = size ? size - 1 : 0;
+ MemOp atmax;
+ MemOp atom;
+
+ /* When serialized, no further atomicity required. */
+ if (s->gen_tb->cflags & CF_PARALLEL) {
+ atom = opc & MO_ATOM_MASK;
+ } else {
+ atom = MO_ATOM_NONE;
+ }
+
+ switch (atom) {
+ case MO_ATOM_NONE:
+ /* The operation requires no specific atomicity. */
+ atmax = MO_8;
+ break;
+
+ case MO_ATOM_IFALIGN:
+ atmax = size;
+ break;
+
+ case MO_ATOM_IFALIGN_PAIR:
+ atmax = half;
+ break;
+
+ case MO_ATOM_WITHIN16:
+ atmax = size;
+ if (size == MO_128) {
+ /* Misalignment implies !within16, and therefore no atomicity. */
+ } else if (host_atom != MO_ATOM_WITHIN16) {
+ /* The host does not implement within16, so require alignment. */
+ align = MAX(align, size);
+ }
+ break;
+
+ case MO_ATOM_WITHIN16_PAIR:
+ atmax = size;
+ /*
+ * Misalignment implies !within16, and therefore half atomicity.
+ * Any host prepared for two operations can implement this with
+ * half alignment.
+ */
+ if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
+ align = MAX(align, half);
+ }
+ break;
+
+ case MO_ATOM_SUBALIGN:
+ atmax = size;
+ if (host_atom != MO_ATOM_SUBALIGN) {
+ /* If unaligned but not odd, there are subobjects up to half. */
+ if (allow_two_ops) {
+ align = MAX(align, half);
+ } else {
+ align = MAX(align, size);
+ }
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ return (TCGAtomAlign){ .atom = atmax, .align = align };
+}
+
/*
* Similarly for qemu_ld/st slow path helpers.
* We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
@@ -5109,57 +5362,12 @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
return ofs;
}
-static void tcg_out_helper_load_regs(TCGContext *s,
- unsigned nmov, TCGMovExtend *mov,
- unsigned ntmp, const int *tmp)
-{
- switch (nmov) {
- default:
- /* The backend must have provided enough temps for the worst case. */
- tcg_debug_assert(ntmp + 1 >= nmov);
-
- for (unsigned i = nmov - 1; i >= 2; --i) {
- TCGReg dst = mov[i].dst;
-
- for (unsigned j = 0; j < i; ++j) {
- if (dst == mov[j].src) {
- /*
- * Conflict.
- * Copy the source to a temporary, recurse for the
- * remaining moves, perform the extension from our
- * scratch on the way out.
- */
- TCGReg scratch = tmp[--ntmp];
- tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
- mov[i].src = scratch;
-
- tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
- tcg_out_movext1(s, &mov[i]);
- return;
- }
- }
-
- /* No conflicts: perform this move and continue. */
- tcg_out_movext1(s, &mov[i]);
- }
- /* fall through for the final two moves */
-
- case 2:
- tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
- return;
- case 1:
- tcg_out_movext1(s, mov);
- return;
- case 0:
- g_assert_not_reached();
- }
-}
-
static void tcg_out_helper_load_slots(TCGContext *s,
unsigned nmov, TCGMovExtend *mov,
const TCGLdstHelperParam *parm)
{
unsigned i;
+ TCGReg dst3;
/*
* Start from the end, storing to the stack first.
@@ -5197,7 +5405,47 @@ static void tcg_out_helper_load_slots(TCGContext *s,
for (i = 0; i < nmov; ++i) {
mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
}
- tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
+
+ switch (nmov) {
+ case 4:
+ /* The backend must have provided enough temps for the worst case. */
+ tcg_debug_assert(parm->ntmp >= 2);
+
+ dst3 = mov[3].dst;
+ for (unsigned j = 0; j < 3; ++j) {
+ if (dst3 == mov[j].src) {
+ /*
+ * Conflict. Copy the source to a temporary, perform the
+ * remaining moves, then the extension from our scratch
+ * on the way out.
+ */
+ TCGReg scratch = parm->tmp[1];
+
+ tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
+ tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
+ tcg_out_movext1_new_src(s, &mov[3], scratch);
+ break;
+ }
+ }
+
+ /* No conflicts: perform this move and continue. */
+ tcg_out_movext1(s, &mov[3]);
+ /* fall through */
+
+ case 3:
+ tcg_out_movext3(s, mov, mov + 1, mov + 2,
+ parm->ntmp ? parm->tmp[0] : -1);
+ break;
+ case 2:
+ tcg_out_movext2(s, mov, mov + 1,
+ parm->ntmp ? parm->tmp[0] : -1);
+ break;
+ case 1:
+ tcg_out_movext1(s, mov);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
@@ -5288,6 +5536,8 @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
TCGType dst_type, TCGType src_type,
TCGReg lo, TCGReg hi)
{
+ MemOp reg_mo;
+
if (dst_type <= TCG_TYPE_REG) {
MemOp src_ext;
@@ -5315,19 +5565,25 @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
return 1;
}
- assert(TCG_TARGET_REG_BITS == 32);
+ if (TCG_TARGET_REG_BITS == 32) {
+ assert(dst_type == TCG_TYPE_I64);
+ reg_mo = MO_32;
+ } else {
+ assert(dst_type == TCG_TYPE_I128);
+ reg_mo = MO_64;
+ }
mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
mov[0].src = lo;
- mov[0].dst_type = TCG_TYPE_I32;
- mov[0].src_type = TCG_TYPE_I32;
- mov[0].src_ext = MO_32;
+ mov[0].dst_type = TCG_TYPE_REG;
+ mov[0].src_type = TCG_TYPE_REG;
+ mov[0].src_ext = reg_mo;
mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
mov[1].src = hi;
- mov[1].dst_type = TCG_TYPE_I32;
- mov[1].src_type = TCG_TYPE_I32;
- mov[1].src_ext = MO_32;
+ mov[1].dst_type = TCG_TYPE_REG;
+ mov[1].src_type = TCG_TYPE_REG;
+ mov[1].src_ext = reg_mo;
return 2;
}
@@ -5350,6 +5606,9 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
case MO_64:
info = &info_helper_ld64_mmu;
break;
+ case MO_128:
+ info = &info_helper_ld128_mmu;
+ break;
default:
g_assert_not_reached();
}
@@ -5358,14 +5617,54 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
next_arg = 1;
loc = &info->in[next_arg];
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
- ldst->addrlo_reg, ldst->addrhi_reg);
- next_arg += nmov;
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
+ /*
+ * 32-bit host with 32-bit guest: zero-extend the guest address
+ * to 64-bits for the helper by storing the low part, then
+ * load a zero for the high part.
+ */
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
+ TCG_TYPE_I32, TCG_TYPE_I32,
+ ldst->addrlo_reg, -1);
+ tcg_out_helper_load_slots(s, 1, mov, parm);
+
+ tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
+ TCG_TYPE_I32, 0, parm);
+ next_arg += 2;
+ } else {
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
+ ldst->addrlo_reg, ldst->addrhi_reg);
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
+ next_arg += nmov;
+ }
- tcg_out_helper_load_slots(s, nmov, mov, parm);
+ switch (info->out_kind) {
+ case TCG_CALL_RET_NORMAL:
+ case TCG_CALL_RET_BY_VEC:
+ break;
+ case TCG_CALL_RET_BY_REF:
+ /*
+ * The return reference is in the first argument slot.
+ * We need memory in which to return: re-use the top of stack.
+ */
+ {
+ int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
- /* No special attention for 32 and 64-bit return values. */
- tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
+ if (arg_slot_reg_p(0)) {
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
+ TCG_REG_CALL_STACK, ofs_slot0);
+ } else {
+ tcg_debug_assert(parm->ntmp != 0);
+ tcg_out_addi_ptr(s, parm->tmp[0],
+ TCG_REG_CALL_STACK, ofs_slot0);
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
+ TCG_REG_CALL_STACK, ofs_slot0);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
@@ -5374,11 +5673,18 @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
bool load_sign,
const TCGLdstHelperParam *parm)
{
+ MemOp mop = get_memop(ldst->oi);
TCGMovExtend mov[2];
+ int ofs_slot0;
- if (ldst->type <= TCG_TYPE_REG) {
- MemOp mop = get_memop(ldst->oi);
+ switch (ldst->type) {
+ case TCG_TYPE_I64:
+ if (TCG_TARGET_REG_BITS == 32) {
+ break;
+ }
+ /* fall through */
+ case TCG_TYPE_I32:
mov[0].dst = ldst->datalo_reg;
mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
mov[0].dst_type = ldst->type;
@@ -5404,25 +5710,49 @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
mov[0].src_ext = mop & MO_SSIZE;
}
tcg_out_movext1(s, mov);
- } else {
- assert(TCG_TARGET_REG_BITS == 32);
-
- mov[0].dst = ldst->datalo_reg;
- mov[0].src =
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
- mov[0].dst_type = TCG_TYPE_I32;
- mov[0].src_type = TCG_TYPE_I32;
- mov[0].src_ext = MO_32;
+ return;
- mov[1].dst = ldst->datahi_reg;
- mov[1].src =
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
- mov[1].dst_type = TCG_TYPE_REG;
- mov[1].src_type = TCG_TYPE_REG;
- mov[1].src_ext = MO_32;
+ case TCG_TYPE_I128:
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+ ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
+ switch (TCG_TARGET_CALL_RET_I128) {
+ case TCG_CALL_RET_NORMAL:
+ break;
+ case TCG_CALL_RET_BY_VEC:
+ tcg_out_st(s, TCG_TYPE_V128,
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
+ TCG_REG_CALL_STACK, ofs_slot0);
+ /* fall through */
+ case TCG_CALL_RET_BY_REF:
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
+ return;
+ default:
+ g_assert_not_reached();
+ }
+ break;
- tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
+ default:
+ g_assert_not_reached();
}
+
+ mov[0].dst = ldst->datalo_reg;
+ mov[0].src =
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
+ mov[0].dst_type = TCG_TYPE_I32;
+ mov[0].src_type = TCG_TYPE_I32;
+ mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
+
+ mov[1].dst = ldst->datahi_reg;
+ mov[1].src =
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
+ mov[1].dst_type = TCG_TYPE_REG;
+ mov[1].src_type = TCG_TYPE_REG;
+ mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
+
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
}
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
@@ -5446,6 +5776,10 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
info = &info_helper_st64_mmu;
data_type = TCG_TYPE_I64;
break;
+ case MO_128:
+ info = &info_helper_st128_mmu;
+ data_type = TCG_TYPE_I128;
+ break;
default:
g_assert_not_reached();
}
@@ -5456,20 +5790,74 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
/* Handle addr argument. */
loc = &info->in[next_arg];
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
- ldst->addrlo_reg, ldst->addrhi_reg);
- next_arg += n;
- nmov += n;
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
+ /*
+ * 32-bit host with 32-bit guest: zero-extend the guest address
+ * to 64-bits for the helper by storing the low part. Later,
+ * after we have processed the register inputs, we will load a
+ * zero for the high part.
+ */
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
+ TCG_TYPE_I32, TCG_TYPE_I32,
+ ldst->addrlo_reg, -1);
+ next_arg += 2;
+ nmov += 1;
+ } else {
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
+ ldst->addrlo_reg, ldst->addrhi_reg);
+ next_arg += n;
+ nmov += n;
+ }
/* Handle data argument. */
loc = &info->in[next_arg];
- n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
- ldst->datalo_reg, ldst->datahi_reg);
- next_arg += n;
- nmov += n;
- tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
+ switch (loc->kind) {
+ case TCG_CALL_ARG_NORMAL:
+ case TCG_CALL_ARG_EXTEND_U:
+ case TCG_CALL_ARG_EXTEND_S:
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
+ ldst->datalo_reg, ldst->datahi_reg);
+ next_arg += n;
+ nmov += n;
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
+ break;
+
+ case TCG_CALL_ARG_BY_REF:
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+ tcg_debug_assert(data_type == TCG_TYPE_I128);
+ tcg_out_st(s, TCG_TYPE_I64,
+ HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
+ tcg_out_st(s, TCG_TYPE_I64,
+ HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
+
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
+
+ if (arg_slot_reg_p(loc->arg_slot)) {
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
+ TCG_REG_CALL_STACK,
+ arg_slot_stk_ofs(loc->ref_slot));
+ } else {
+ tcg_debug_assert(parm->ntmp != 0);
+ tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
+ arg_slot_stk_ofs(loc->ref_slot));
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
+ }
+ next_arg += 2;
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
+ /* Zero extend the address by loading a zero for the high part. */
+ loc = &info->in[1 + !HOST_BIG_ENDIAN];
+ tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
+ }
- tcg_out_helper_load_slots(s, nmov, mov, parm);
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
@@ -5582,7 +5970,7 @@ int64_t tcg_cpu_exec_time(void)
#endif
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
{
#ifdef CONFIG_PROFILER
TCGProfile *prof = &s->prof;
@@ -5743,13 +6131,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
}
num_insns++;
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
- target_ulong a;
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
-#else
- a = op->args[i];
-#endif
- s->gen_insn_data[num_insns][i] = a;
+ s->gen_insn_data[num_insns][i] =
+ tcg_get_insn_start_param(op, i);
}
break;
case INDEX_op_discard: