aboutsummaryrefslogtreecommitdiff
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/arm/tcg-target.c56
-rw-r--r--tcg/arm/tcg-target.h2
-rw-r--r--tcg/ia64/tcg-target.c194
-rw-r--r--tcg/ia64/tcg-target.h11
-rw-r--r--tcg/optimize.c471
-rw-r--r--tcg/sparc/tcg-target.c643
-rw-r--r--tcg/sparc/tcg-target.h9
-rw-r--r--tcg/tcg-op.h11
-rw-r--r--tcg/tcg.c53
9 files changed, 943 insertions, 507 deletions
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 737200e5e6..e790bf04b4 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -467,6 +467,21 @@ static inline void tcg_out_movi32(TCGContext *s,
}
}
+static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
+ TCGArg lhs, TCGArg rhs, int rhs_is_const)
+{
+ /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rI" constraint.
+ */
+ if (rhs_is_const) {
+ int rot = encode_imm(rhs);
+ assert(rot >= 0);
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
static inline void tcg_out_mul32(TCGContext *s,
int cond, int rd, int rs, int rm)
{
@@ -1557,6 +1572,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_movi_i32:
tcg_out_movi32(s, COND_AL, args[0], args[1]);
break;
+ case INDEX_op_movcond_i32:
+ /* Constraints mean that v2 is always in the same register as dest,
+ * so we only need to do "if condition passed, move v1 to dest".
+ */
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+ args[1], args[2], const_args[2]);
+ tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]],
+ ARITH_MOV, args[0], 0, args[3], const_args[3]);
+ break;
case INDEX_op_add_i32:
c = ARITH_ADD;
goto gen_arith;
@@ -1576,14 +1600,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c = ARITH_EOR;
/* Fall through. */
gen_arith:
- if (const_args[2]) {
- int rot;
- rot = encode_imm(args[2]);
- tcg_out_dat_imm(s, COND_AL, c,
- args[0], args[1], rotl(args[2], rot) | (rot << 7));
- } else
- tcg_out_dat_reg(s, COND_AL, c,
- args[0], args[1], args[2], SHIFT_IMM_LSL(0));
+ tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
break;
case INDEX_op_add2_i32:
tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC,
@@ -1643,15 +1660,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_brcond_i32:
- if (const_args[1]) {
- int rot;
- rot = encode_imm(args[1]);
- tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
- args[0], rotl(args[1], rot) | (rot << 7));
- } else {
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
- args[0], args[1], SHIFT_IMM_LSL(0));
- }
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+ args[0], args[1], const_args[1]);
tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
break;
case INDEX_op_brcond2_i32:
@@ -1670,15 +1680,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
break;
case INDEX_op_setcond_i32:
- if (const_args[2]) {
- int rot;
- rot = encode_imm(args[2]);
- tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
- args[1], rotl(args[2], rot) | (rot << 7));
- } else {
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
- args[1], args[2], SHIFT_IMM_LSL(0));
- }
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+ args[1], args[2], const_args[2]);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
ARITH_MOV, args[0], 0, 1);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
@@ -1788,6 +1791,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_brcond_i32, { "r", "rI" } },
{ INDEX_op_setcond_i32, { "r", "r", "rI" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } },
/* TODO: "r", "r", "r", "r", "ri", "ri" */
{ INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 2bc7dff846..98fa11b286 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -73,7 +73,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
-#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
enum {
TCG_AREG0 = TCG_REG_R6,
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 705712f775..06570bea38 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -230,6 +230,8 @@ enum {
OPC_CMP4_LT_A6 = 0x18400000000ull,
OPC_CMP4_LTU_A6 = 0x1a400000000ull,
OPC_CMP4_EQ_A6 = 0x1c400000000ull,
+ OPC_DEP_I14 = 0x0ae00000000ull,
+ OPC_DEP_I15 = 0x08000000000ull,
OPC_DEP_Z_I12 = 0x0a600000000ull,
OPC_EXTR_I11 = 0x0a400002000ull,
OPC_EXTR_U_I11 = 0x0a400000000ull,
@@ -501,6 +503,30 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
| (qp & 0x3f);
}
+static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm,
+ int r3, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((imm & 0x01) << 36)
+ | ((len & 0x3f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((pos & 0x3f) << 14)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2,
+ int r3, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((pos & 0x3f) << 31)
+ | ((len & 0x0f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
{
return opc
@@ -1312,6 +1338,37 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
}
+static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
+ TCGArg a2, int const_a2, int pos, int len)
+{
+ uint64_t i1 = 0, i2 = 0;
+ int cpos = 63 - pos, lm1 = len - 1;
+
+ if (const_a2) {
+ /* Truncate the value of a constant a2 to the width of the field. */
+ int mask = (1u << len) - 1;
+ a2 &= mask;
+
+ if (a2 == 0 || a2 == mask) {
+ /* 1-bit signed constant inserted into register. */
+ i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1);
+ } else {
+ /* Otherwise, load any constant into a temporary. Do this into
+ the first I slot to help out with cross-unit delays. */
+ i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+ TCG_REG_R2, a2, TCG_REG_R0);
+ a2 = TCG_REG_R2;
+ }
+ }
+ if (i2 == 0) {
+ i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
+ }
+ tcg_out_bundle(s, (i1 ? mII : miI),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+ i2);
+}
+
static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
TCGArg arg2, int cmp4)
{
@@ -1404,21 +1461,47 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
}
+static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg c1, TCGArg c2,
+ TCGArg v1, int const_v1,
+ TCGArg v2, int const_v2, int cmp4)
+{
+ uint64_t opc1, opc2;
+
+ if (const_v1) {
+ opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
+ } else if (ret == v1) {
+ opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+ } else {
+ opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
+ }
+ if (const_v2) {
+ opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
+ } else if (ret == v2) {
+ opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+ } else {
+ opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
+ }
+
+ tcg_out_bundle(s, MmI,
+ tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4),
+ opc1,
+ opc2);
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
/* Load and compare a TLB entry, and return the result in (p6, p7).
R2 is loaded with the address of the addend TLB entry.
- R56 is loaded with the address, zero extented on 32-bit targets. */
+ R57 is loaded with the address, zero extented on 32-bit targets. */
static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
int s_bits, uint64_t offset_rw,
uint64_t offset_addend)
{
tcg_out_bundle(s, mII,
- tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
- TARGET_PAGE_MASK | ((1 << s_bits) - 1),
- TCG_REG_R0),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
@@ -1428,9 +1511,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
offset_rw, TCG_REG_R2),
#if TARGET_LONG_BITS == 32
- tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg),
+ tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
#else
- tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56,
+ tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
0, addr_reg),
#endif
tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
@@ -1438,12 +1521,13 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
tcg_out_bundle(s, mII,
tcg_opc_m3 (TCG_REG_P0,
(TARGET_LONG_BITS == 32
- ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57,
+ ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
TCG_REG_R2, offset_addend - offset_rw),
- tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3,
- TCG_REG_R3, TCG_REG_R56),
+ tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0,
+ TCG_REG_R57, 63 - s_bits,
+ TARGET_PAGE_BITS - s_bits - 1),
tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
- TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
+ TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
}
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -1480,8 +1564,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
/* P6 is the fast path, and P7 the slow path */
tcg_out_bundle(s, mLX,
- tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57,
- mem_index, TCG_REG_R0),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+ TCG_REG_R56, 0, TCG_AREG0),
tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
(tcg_target_long) qemu_ld_helpers[s_bits]));
@@ -1489,7 +1573,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
TCG_REG_R2, 8),
tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
- TCG_REG_R3, TCG_REG_R56),
+ TCG_REG_R3, TCG_REG_R57),
tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
TCG_REG_R3, 0));
if (bswap && s_bits == 1) {
@@ -1513,23 +1597,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
}
- /* XXX/FIXME: suboptimal */
- tcg_out_bundle(s, mII,
- tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
- mem_index, TCG_REG_R0),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R57, 0, TCG_REG_R56),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R56, 0, TCG_AREG0));
if (!bswap || s_bits == 0) {
tcg_out_bundle(s, miB,
- tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+ mem_index, TCG_REG_R0),
tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
TCG_REG_B0, TCG_REG_B6));
} else {
tcg_out_bundle(s, miB,
- tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+ mem_index, TCG_REG_R0),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R8, TCG_REG_R8, 0xb),
tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
@@ -1581,8 +1659,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* P6 is the fast path, and P7 the slow path */
tcg_out_bundle(s, mLX,
- tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57,
- 0, data_reg),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+ TCG_REG_R56, 0, TCG_AREG0),
tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
(tcg_target_long) qemu_st_helpers[opc]));
@@ -1590,31 +1668,42 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
TCG_REG_R2, 8),
tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
- TCG_REG_R3, TCG_REG_R56),
+ TCG_REG_R3, TCG_REG_R57),
tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
TCG_REG_R3, 0));
if (!bswap || opc == 0) {
- tcg_out_bundle(s, mII,
+ tcg_out_bundle(s, mii,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
- tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
} else if (opc == 1) {
- tcg_out_bundle(s, mII,
+ tcg_out_bundle(s, miI,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
- TCG_REG_R2, data_reg, 15, 15),
+ TCG_REG_R2, data_reg, 15, 15));
+ tcg_out_bundle(s, miI,
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R2, TCG_REG_R2, 0xb));
data_reg = TCG_REG_R2;
} else if (opc == 2) {
- tcg_out_bundle(s, mII,
+ tcg_out_bundle(s, miI,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
- TCG_REG_R2, data_reg, 31, 31),
+ TCG_REG_R2, data_reg, 31, 31));
+ tcg_out_bundle(s, miI,
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R2, TCG_REG_R2, 0xb));
data_reg = TCG_REG_R2;
@@ -1622,25 +1711,18 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_out_bundle(s, miI,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
- tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R2, data_reg, 0xb));
data_reg = TCG_REG_R2;
}
- /* XXX/FIXME: suboptimal */
- tcg_out_bundle(s, mII,
- tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
- mem_index, TCG_REG_R0),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R58, 0, TCG_REG_R57),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R57, 0, TCG_REG_R56));
tcg_out_bundle(s, miB,
tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
data_reg, TCG_REG_R3),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R56, 0, TCG_AREG0),
+ tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
+ mem_index, TCG_REG_R0),
tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
TCG_REG_B0, TCG_REG_B6));
}
@@ -2092,6 +2174,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_bswap64(s, args[0], args[1]);
break;
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ tcg_out_deposit(s, args[0], args[1], args[2], const_args[2],
+ args[3], args[4]);
+ break;
+
case INDEX_op_brcond_i32:
tcg_out_brcond(s, args[2], args[0], const_args[0],
args[1], const_args[1], args[3], 1);
@@ -2106,6 +2194,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_setcond_i64:
tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ args[3], const_args[3], args[4], const_args[4], 1);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ args[3], const_args[3], args[4], const_args[4], 0);
+ break;
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
@@ -2196,6 +2292,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_brcond_i32, { "rI", "rI" } },
{ INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
{ INDEX_op_mov_i64, { "r", "r" } },
{ INDEX_op_movi_i64, { "r" } },
@@ -2245,6 +2342,10 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_brcond_i64, { "rI", "rI" } },
{ INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },
+
+ { INDEX_op_deposit_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_deposit_i64, { "r", "rZ", "ri" } },
{ INDEX_op_qemu_ld8u, { "r", "r" } },
{ INDEX_op_qemu_ld8s, { "r", "r" } },
@@ -2269,9 +2370,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
int frame_size;
/* reserve some stack space */
- frame_size = TCG_STATIC_CALL_ARGS_SIZE;
+ frame_size = TCG_STATIC_CALL_ARGS_SIZE +
+ CPU_TEMP_BUF_NLONGS * sizeof(long);
frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
~(TCG_TARGET_STACK_ALIGN - 1);
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
/* First emit adhoc function descriptor */
*(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */
@@ -2378,6 +2482,4 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
tcg_add_target_add_op_defs(ia64_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
- CPU_TEMP_BUF_NLONGS * sizeof(long));
}
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index b7e01b25ae..91fe7a3b06 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -131,10 +131,13 @@ typedef enum {
#define TCG_TARGET_HAS_orc_i64 1
#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_rot_i64 1
-#define TCG_TARGET_HAS_deposit_i32 0
-#define TCG_TARGET_HAS_deposit_i64 0
-#define TCG_TARGET_HAS_movcond_i32 0
-#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_deposit_i64 1
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
+#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub r1, r0, r3 */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index edb2b0ea90..a06c8eb43e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,6 +292,82 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
return res;
}
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int32_t)x < (int32_t)y;
+ case TCG_COND_GE:
+ return (int32_t)x >= (int32_t)y;
+ case TCG_COND_LE:
+ return (int32_t)x <= (int32_t)y;
+ case TCG_COND_GT:
+ return (int32_t)x > (int32_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int64_t)x < (int64_t)y;
+ case TCG_COND_GE:
+ return (int64_t)x >= (int64_t)y;
+ case TCG_COND_LE:
+ return (int64_t)x <= (int64_t)y;
+ case TCG_COND_GT:
+ return (int64_t)x > (int64_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_GT:
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ case TCG_COND_GTU:
+ case TCG_COND_NE:
+ return 0;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ case TCG_COND_EQ:
+ return 1;
+ default:
+ tcg_abort();
+ }
+}
+
/* Return 2 if the condition can't be simplified, and the result
of the condition (0 or 1) if it can */
static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
@@ -300,75 +376,14 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
switch (op_bits(op)) {
case 32:
- switch (c) {
- case TCG_COND_EQ:
- return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
- case TCG_COND_NE:
- return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
- case TCG_COND_LT:
- return (int32_t)temps[x].val < (int32_t)temps[y].val;
- case TCG_COND_GE:
- return (int32_t)temps[x].val >= (int32_t)temps[y].val;
- case TCG_COND_LE:
- return (int32_t)temps[x].val <= (int32_t)temps[y].val;
- case TCG_COND_GT:
- return (int32_t)temps[x].val > (int32_t)temps[y].val;
- case TCG_COND_LTU:
- return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
- case TCG_COND_GEU:
- return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
- case TCG_COND_LEU:
- return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
- case TCG_COND_GTU:
- return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
- default:
- break;
- }
- break;
+ return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
case 64:
- switch (c) {
- case TCG_COND_EQ:
- return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
- case TCG_COND_NE:
- return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
- case TCG_COND_LT:
- return (int64_t)temps[x].val < (int64_t)temps[y].val;
- case TCG_COND_GE:
- return (int64_t)temps[x].val >= (int64_t)temps[y].val;
- case TCG_COND_LE:
- return (int64_t)temps[x].val <= (int64_t)temps[y].val;
- case TCG_COND_GT:
- return (int64_t)temps[x].val > (int64_t)temps[y].val;
- case TCG_COND_LTU:
- return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
- case TCG_COND_GEU:
- return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
- case TCG_COND_LEU:
- return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
- case TCG_COND_GTU:
- return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
- default:
- break;
- }
- break;
- }
- } else if (temps_are_copies(x, y)) {
- switch (c) {
- case TCG_COND_GT:
- case TCG_COND_LTU:
- case TCG_COND_LT:
- case TCG_COND_GTU:
- case TCG_COND_NE:
- return 0;
- case TCG_COND_GE:
- case TCG_COND_GEU:
- case TCG_COND_LE:
- case TCG_COND_LEU:
- case TCG_COND_EQ:
- return 1;
+ return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
default:
- break;
+ tcg_abort();
}
+ } else if (temps_are_copies(x, y)) {
+ return do_constant_folding_cond_eq(c);
} else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
switch (c) {
case TCG_COND_LTU:
@@ -381,11 +396,73 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
} else {
return 2;
}
+}
- fprintf(stderr,
- "Unrecognized bitness %d or condition %d in "
- "do_constant_folding_cond.\n", op_bits(op), c);
- tcg_abort();
+/* Return 2 if the condition can't be simplified, and the result
+ of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+ TCGArg al = p1[0], ah = p1[1];
+ TCGArg bl = p2[0], bh = p2[1];
+
+ if (temps[bl].state == TCG_TEMP_CONST
+ && temps[bh].state == TCG_TEMP_CONST) {
+ uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+ if (temps[al].state == TCG_TEMP_CONST
+ && temps[ah].state == TCG_TEMP_CONST) {
+ uint64_t a;
+ a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+ return do_constant_folding_cond_64(a, b, c);
+ }
+ if (b == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ return 0;
+ case TCG_COND_GEU:
+ return 1;
+ default:
+ break;
+ }
+ }
+ }
+ if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+ return do_constant_folding_cond_eq(c);
+ }
+ return 2;
+}
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+ TCGArg a1 = *p1, a2 = *p2;
+ int sum = 0;
+ sum += temps[a1].state == TCG_TEMP_CONST;
+ sum -= temps[a2].state == TCG_TEMP_CONST;
+
+ /* Prefer the constant in second argument, and then the form
+ op a, a, b, which is better handled on non-RISC hosts. */
+ if (sum > 0 || (sum == 0 && dest == a2)) {
+ *p1 = a2;
+ *p2 = a1;
+ return true;
+ }
+ return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+ int sum = 0;
+ sum += temps[p1[0]].state == TCG_TEMP_CONST;
+ sum += temps[p1[1]].state == TCG_TEMP_CONST;
+ sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+ sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+ if (sum > 0) {
+ TCGArg t;
+ t = p1[0], p1[0] = p2[0], p2[0] = t;
+ t = p1[1], p1[1] = p2[1], p2[1] = t;
+ return true;
+ }
+ return false;
}
/* Propagate constants and copies, fold constant expressions. */
@@ -397,7 +474,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
const TCGOpDef *def;
TCGArg *gen_args;
TCGArg tmp;
- TCGCond cond;
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
@@ -440,52 +516,46 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
- /* Prefer the constant in second argument, and then the form
- op a, a, b, which is better handled on non-RISC hosts. */
- if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
- && temps[args[2]].state != TCG_TEMP_CONST)) {
- tmp = args[1];
- args[1] = args[2];
- args[2] = tmp;
- }
+ swap_commutative(args[0], &args[1], &args[2]);
break;
CASE_OP_32_64(brcond):
- if (temps[args[0]].state == TCG_TEMP_CONST
- && temps[args[1]].state != TCG_TEMP_CONST) {
- tmp = args[0];
- args[0] = args[1];
- args[1] = tmp;
+ if (swap_commutative(-1, &args[0], &args[1])) {
args[2] = tcg_swap_cond(args[2]);
}
break;
CASE_OP_32_64(setcond):
- if (temps[args[1]].state == TCG_TEMP_CONST
- && temps[args[2]].state != TCG_TEMP_CONST) {
- tmp = args[1];
- args[1] = args[2];
- args[2] = tmp;
+ if (swap_commutative(args[0], &args[1], &args[2])) {
args[3] = tcg_swap_cond(args[3]);
}
break;
CASE_OP_32_64(movcond):
- cond = args[5];
- if (temps[args[1]].state == TCG_TEMP_CONST
- && temps[args[2]].state != TCG_TEMP_CONST) {
- tmp = args[1];
- args[1] = args[2];
- args[2] = tmp;
- cond = tcg_swap_cond(cond);
+ if (swap_commutative(-1, &args[1], &args[2])) {
+ args[5] = tcg_swap_cond(args[5]);
}
/* For movcond, we canonicalize the "false" input reg to match
the destination reg so that the tcg backend can implement
a "move if true" operation. */
- if (args[0] == args[3]) {
- tmp = args[3];
- args[3] = args[4];
- args[4] = tmp;
- cond = tcg_invert_cond(cond);
+ if (swap_commutative(args[0], &args[4], &args[3])) {
+ args[5] = tcg_invert_cond(args[5]);
}
- args[5] = cond;
+ break;
+ case INDEX_op_add2_i32:
+ swap_commutative(args[0], &args[2], &args[4]);
+ swap_commutative(args[1], &args[3], &args[5]);
+ break;
+ case INDEX_op_mulu2_i32:
+ swap_commutative(args[0], &args[2], &args[3]);
+ break;
+ case INDEX_op_brcond2_i32:
+ if (swap_commutative2(&args[0], &args[2])) {
+ args[4] = tcg_swap_cond(args[4]);
+ }
+ break;
+ case INDEX_op_setcond2_i32:
+ if (swap_commutative2(&args[1], &args[3])) {
+ args[5] = tcg_swap_cond(args[5]);
+ }
+ break;
default:
break;
}
@@ -622,6 +692,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_args += 2;
args += 2;
break;
+
CASE_OP_32_64(not):
CASE_OP_32_64(neg):
CASE_OP_32_64(ext8s):
@@ -634,14 +705,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding(op, temps[args[1]].val, 0);
tcg_opt_gen_movi(gen_args, args[0], tmp);
- } else {
- reset_temp(args[0]);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
+ gen_args += 2;
+ args += 2;
+ break;
}
- gen_args += 2;
- args += 2;
- break;
+ goto do_default;
+
CASE_OP_32_64(add):
CASE_OP_32_64(sub):
CASE_OP_32_64(mul):
@@ -665,15 +734,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
temps[args[2]].val);
tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
- } else {
- reset_temp(args[0]);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args += 3;
+ args += 3;
+ break;
}
- args += 3;
- break;
+ goto do_default;
+
CASE_OP_32_64(deposit):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
@@ -683,33 +748,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
| ((temps[args[2]].val & tmp) << args[3]);
tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
- } else {
- reset_temp(args[0]);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args[3] = args[3];
- gen_args[4] = args[4];
- gen_args += 5;
+ args += 5;
+ break;
}
- args += 5;
- break;
+ goto do_default;
+
CASE_OP_32_64(setcond):
tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
if (tmp != 2) {
gen_opc_buf[op_index] = op_to_movi(op);
tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
- } else {
- reset_temp(args[0]);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args[3] = args[3];
- gen_args += 4;
+ args += 4;
+ break;
}
- args += 4;
- break;
+ goto do_default;
+
CASE_OP_32_64(brcond):
tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
if (tmp != 2) {
@@ -721,17 +775,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} else {
gen_opc_buf[op_index] = INDEX_op_nop;
}
- } else {
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
- reset_temp(args[0]);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args[3] = args[3];
- gen_args += 4;
+ args += 4;
+ break;
}
- args += 4;
- break;
+ goto do_default;
+
CASE_OP_32_64(movcond):
tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
if (tmp != 2) {
@@ -746,18 +794,125 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
gen_args += 2;
}
+ args += 6;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[4]].state == TCG_TEMP_CONST
+ && temps[args[5]].state == TCG_TEMP_CONST) {
+ uint32_t al = temps[args[2]].val;
+ uint32_t ah = temps[args[3]].val;
+ uint32_t bl = temps[args[4]].val;
+ uint32_t bh = temps[args[5]].val;
+ uint64_t a = ((uint64_t)ah << 32) | al;
+ uint64_t b = ((uint64_t)bh << 32) | bl;
+ TCGArg rl, rh;
+
+ if (op == INDEX_op_add2_i32) {
+ a += b;
+ } else {
+ a -= b;
+ }
+
+ /* We emit the extra nop when we emit the add2/sub2. */
+ assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+ rl = args[0];
+ rh = args[1];
+ gen_opc_buf[op_index] = INDEX_op_movi_i32;
+ gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+ tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+ tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+ gen_args += 4;
+ args += 6;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_mulu2_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST) {
+ uint32_t a = temps[args[2]].val;
+ uint32_t b = temps[args[3]].val;
+ uint64_t r = (uint64_t)a * b;
+ TCGArg rl, rh;
+
+ /* We emit the extra nop when we emit the mulu2. */
+ assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+ rl = args[0];
+ rh = args[1];
+ gen_opc_buf[op_index] = INDEX_op_movi_i32;
+ gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+ tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+ tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+ gen_args += 4;
+ args += 4;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_brcond2_i32:
+ tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+ if (tmp != 2) {
+ if (tmp) {
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ gen_opc_buf[op_index] = INDEX_op_br;
+ gen_args[0] = args[5];
+ gen_args += 1;
+ } else {
+ gen_opc_buf[op_index] = INDEX_op_nop;
+ }
+ } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0
+ && temps[args[3]].val == 0) {
+ /* Simplify LT/GE comparisons vs zero to a single compare
+ vs the high word of the input. */
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+ gen_args[0] = args[1];
+ gen_args[1] = args[3];
+ gen_args[2] = args[4];
+ gen_args[3] = args[5];
+ gen_args += 4;
} else {
- reset_temp(args[0]);
+ goto do_default;
+ }
+ args += 6;
+ break;
+
+ case INDEX_op_setcond2_i32:
+ tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+ if (tmp != 2) {
+ gen_opc_buf[op_index] = INDEX_op_movi_i32;
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
+ gen_args += 2;
+ } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[4]].state == TCG_TEMP_CONST
+ && temps[args[3]].val == 0
+ && temps[args[4]].val == 0) {
+ /* Simplify LT/GE comparisons vs zero to a single compare
+ vs the high word of the input. */
+ gen_opc_buf[op_index] = INDEX_op_setcond_i32;
gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args[3] = args[3];
- gen_args[4] = args[4];
- gen_args[5] = args[5];
- gen_args += 6;
+ gen_args[1] = args[2];
+ gen_args[2] = args[4];
+ gen_args[3] = args[5];
+ gen_args += 4;
+ } else {
+ goto do_default;
}
args += 6;
break;
+
case INDEX_op_call:
nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -776,11 +931,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
i--;
}
break;
+
default:
- /* Default case: we do know nothing about operation so no
- propagation is done. We trash everything if the operation
- is the end of a basic block, otherwise we only trash the
- output args. */
+ do_default:
+ /* Default case: we know nothing about operation (or were unable
+ to compute the operation result) so no propagation is done.
+ We trash everything if the operation is the end of a basic
+ block, otherwise we only trash the output args. */
if (def->flags & TCG_OPF_BB_END) {
memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
} else {
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 0c32baa50e..f146647874 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -115,96 +115,6 @@ static const int tcg_target_call_oarg_regs[] = {
TCG_REG_O3,
};
-static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
-{
- return (val << ((sizeof(tcg_target_long) * 8 - bits))
- >> (sizeof(tcg_target_long) * 8 - bits)) == val;
-}
-
-static inline int check_fit_i32(uint32_t val, unsigned int bits)
-{
- return ((val << (32 - bits)) >> (32 - bits)) == val;
-}
-
-static void patch_reloc(uint8_t *code_ptr, int type,
- tcg_target_long value, tcg_target_long addend)
-{
- value += addend;
- switch (type) {
- case R_SPARC_32:
- if (value != (uint32_t)value)
- tcg_abort();
- *(uint32_t *)code_ptr = value;
- break;
- case R_SPARC_WDISP22:
- value -= (long)code_ptr;
- value >>= 2;
- if (!check_fit_tl(value, 22))
- tcg_abort();
- *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x3fffff) | value;
- break;
- case R_SPARC_WDISP19:
- value -= (long)code_ptr;
- value >>= 2;
- if (!check_fit_tl(value, 19))
- tcg_abort();
- *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x7ffff) | value;
- break;
- default:
- tcg_abort();
- }
-}
-
-/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
-{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch (ct_str[0]) {
- case 'r':
- ct->ct |= TCG_CT_REG;
- tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
- break;
- case 'L': /* qemu_ld/st constraint */
- ct->ct |= TCG_CT_REG;
- tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
- // Helper args
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
- break;
- case 'I':
- ct->ct |= TCG_CT_CONST_S11;
- break;
- case 'J':
- ct->ct |= TCG_CT_CONST_S13;
- break;
- default:
- return -1;
- }
- ct_str++;
- *pct_str = ct_str;
- return 0;
-}
-
-/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val,
- const TCGArgConstraint *arg_ct)
-{
- int ct;
-
- ct = arg_ct->ct;
- if (ct & TCG_CT_CONST)
- return 1;
- else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11))
- return 1;
- else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13))
- return 1;
- else
- return 0;
-}
-
#define INSN_OP(x) ((x) << 30)
#define INSN_OP2(x) ((x) << 22)
#define INSN_OP3(x) ((x) << 19)
@@ -214,12 +124,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define INSN_RS2(x) (x)
#define INSN_ASI(x) ((x) << 5)
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
+#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
-#define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
+#define INSN_COND(x) ((x) << 25)
-#define INSN_COND(x, a) (((x) << 25) | ((a) << 29))
#define COND_N 0x0
#define COND_E 0x1
#define COND_LE 0x2
@@ -236,11 +147,26 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define COND_CC 0xd
#define COND_POS 0xe
#define COND_VC 0xf
-#define BA (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2))
+#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
+
+#define RCOND_Z 1
+#define RCOND_LEZ 2
+#define RCOND_LZ 3
+#define RCOND_NZ 5
+#define RCOND_GZ 6
+#define RCOND_GEZ 7
#define MOVCC_ICC (1 << 18)
#define MOVCC_XCC (1 << 18 | 1 << 12)
+#define BPCC_ICC 0
+#define BPCC_XCC (2 << 20)
+#define BPCC_PT (1 << 19)
+#define BPCC_PN 0
+#define BPCC_A (1 << 29)
+
+#define BPR_PT BPCC_PT
+
#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00))
#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01))
@@ -260,6 +186,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25))
#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26))
@@ -313,6 +240,109 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
+{
+ return (val << ((sizeof(tcg_target_long) * 8 - bits))
+ >> (sizeof(tcg_target_long) * 8 - bits)) == val;
+}
+
+static inline int check_fit_i32(uint32_t val, unsigned int bits)
+{
+ return ((val << (32 - bits)) >> (32 - bits)) == val;
+}
+
+static void patch_reloc(uint8_t *code_ptr, int type,
+ tcg_target_long value, tcg_target_long addend)
+{
+ uint32_t insn;
+ value += addend;
+ switch (type) {
+ case R_SPARC_32:
+ if (value != (uint32_t)value) {
+ tcg_abort();
+ }
+ *(uint32_t *)code_ptr = value;
+ break;
+ case R_SPARC_WDISP16:
+ value -= (long)code_ptr;
+ if (!check_fit_tl(value >> 2, 16)) {
+ tcg_abort();
+ }
+ insn = *(uint32_t *)code_ptr;
+ insn &= ~INSN_OFF16(-1);
+ insn |= INSN_OFF16(value);
+ *(uint32_t *)code_ptr = insn;
+ break;
+ case R_SPARC_WDISP19:
+ value -= (long)code_ptr;
+ if (!check_fit_tl(value >> 2, 19)) {
+ tcg_abort();
+ }
+ insn = *(uint32_t *)code_ptr;
+ insn &= ~INSN_OFF19(-1);
+ insn |= INSN_OFF19(value);
+ *(uint32_t *)code_ptr = insn;
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch (ct_str[0]) {
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ break;
+ case 'L': /* qemu_ld/st constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ // Helper args
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+ break;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_S11;
+ break;
+ case 'J':
+ ct->ct |= TCG_CT_CONST_S13;
+ break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
int op)
{
@@ -337,7 +367,9 @@ static void tcg_out_arithc(TCGContext *s, int rd, int rs1,
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
- tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+ if (ret != arg) {
+ tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+ }
}
static inline void tcg_out_sethi(TCGContext *s, int ret, uint32_t arg)
@@ -479,39 +511,6 @@ static inline void tcg_out_nop(TCGContext *s)
tcg_out_sethi(s, TCG_REG_G0, 0);
}
-static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
-{
- TCGLabel *l = &s->labels[label_index];
- uint32_t off22;
-
- if (l->has_value) {
- off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr);
- } else {
- /* Make sure to preserve destinations during retranslation. */
- off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1);
- tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
- }
- tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22);
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
-{
- TCGLabel *l = &s->labels[label_index];
- uint32_t off19;
-
- if (l->has_value) {
- off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
- } else {
- /* Make sure to preserve destinations during retranslation. */
- off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
- tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
- }
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
- (0x5 << 19) | off19));
-}
-#endif
-
static const uint8_t tcg_cond_to_bcond[] = {
[TCG_COND_EQ] = COND_E,
[TCG_COND_NE] = COND_NE,
@@ -525,70 +524,144 @@ static const uint8_t tcg_cond_to_bcond[] = {
[TCG_COND_GTU] = COND_GU,
};
+static const uint8_t tcg_cond_to_rcond[] = {
+ [TCG_COND_EQ] = RCOND_Z,
+ [TCG_COND_NE] = RCOND_NZ,
+ [TCG_COND_LT] = RCOND_LZ,
+ [TCG_COND_GT] = RCOND_GZ,
+ [TCG_COND_LE] = RCOND_LEZ,
+ [TCG_COND_GE] = RCOND_GEZ
+};
+
+static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
+{
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
+}
+
+static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label)
+{
+ TCGLabel *l = &s->labels[label];
+ int off19;
+
+ if (l->has_value) {
+ off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
+ } else {
+ /* Make sure to preserve destinations during retranslation. */
+ off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0);
+ }
+ tcg_out_bpcc0(s, scond, flags, off19);
+}
+
static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const)
{
tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
}
-static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index)
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2, int label)
{
tcg_out_cmp(s, arg1, arg2, const_arg2);
- tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label);
tcg_out_nop(s);
}
+static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGArg ret,
+ TCGArg v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
+ | INSN_RS1(tcg_cond_to_bcond[cond])
+ | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg c1, TCGArg c2, int c2const,
+ TCGArg v1, int v1const)
+{
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
+}
+
#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index)
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2, int label)
{
- tcg_out_cmp(s, arg1, arg2, const_arg2);
- tcg_out_branch_i64(s, tcg_cond_to_bcond[cond], label_index);
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare. */
+ if (arg2 == 0 && !is_unsigned_cond(cond)) {
+ TCGLabel *l = &s->labels[label];
+ int off16;
+
+ if (l->has_value) {
+ off16 = INSN_OFF16(l->u.value - (unsigned long)s->code_ptr);
+ } else {
+ /* Make sure to preserve destinations during retranslation. */
+ off16 = *(uint32_t *)s->code_ptr & INSN_OFF16(-1);
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, label, 0);
+ }
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
+ | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
+ } else {
+ tcg_out_cmp(s, arg1, arg2, const_arg2);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, label);
+ }
tcg_out_nop(s);
}
+
+static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1,
+ TCGArg v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
+ | (tcg_cond_to_rcond[cond] << 10)
+ | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg c1, TCGArg c2, int c2const,
+ TCGArg v1, int v1const)
+{
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare.
+ Note that the immediate range is one bit smaller, so we must check
+ for that as well. */
+ if (c2 == 0 && !is_unsigned_cond(cond)
+ && (!v1const || check_fit_tl(v1, 10))) {
+ tcg_out_movr(s, cond, ret, c1, v1, v1const);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
+ }
+}
#else
static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
TCGArg al, TCGArg ah,
TCGArg bl, int blconst,
TCGArg bh, int bhconst, int label_dest)
{
- int cc, label_next = gen_new_label();
+ int scond, label_next = gen_new_label();
tcg_out_cmp(s, ah, bh, bhconst);
/* Note that we fill one of the delay slots with the second compare. */
switch (cond) {
case TCG_COND_EQ:
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
- tcg_out_branch_i32(s, cc, label_next);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
tcg_out_cmp(s, al, bl, blconst);
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_EQ], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest);
break;
case TCG_COND_NE:
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
tcg_out_cmp(s, al, bl, blconst);
- tcg_out_branch_i32(s, cc, label_dest);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
break;
default:
- /* ??? One could fairly easily special-case 64-bit unsigned
- compares against 32-bit zero-extended constants. For instance,
- we know that (unsigned)AH < 0 is false and need not emit it.
- Similarly, (unsigned)AH > 0 being true implies AH != 0, so the
- second branch will never be taken. */
- cc = INSN_COND(tcg_cond_to_bcond[cond], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ scond = tcg_cond_to_bcond[tcg_high_cond(cond)];
+ tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
tcg_out_nop(s);
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
- tcg_out_branch_i32(s, cc, label_next);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
tcg_out_cmp(s, al, bl, blconst);
- cc = INSN_COND(tcg_cond_to_bcond[tcg_unsigned_cond(cond)], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
+ tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
break;
}
tcg_out_nop(s);
@@ -600,39 +673,42 @@ static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
TCGArg c1, TCGArg c2, int c2const)
{
- TCGArg t;
-
/* For 32-bit comparisons, we can play games with ADDX/SUBX. */
switch (cond) {
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ /* The result of the comparison is in the carry bit. */
+ break;
+
case TCG_COND_EQ:
case TCG_COND_NE:
+ /* For equality, we can transform to inequality vs zero. */
if (c2 != 0) {
tcg_out_arithc(s, ret, c1, c2, c2const, ARITH_XOR);
}
c1 = TCG_REG_G0, c2 = ret, c2const = 0;
- cond = (cond == TCG_COND_EQ ? TCG_COND_LEU : TCG_COND_LTU);
+ cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
break;
case TCG_COND_GTU:
- case TCG_COND_GEU:
- if (c2const && c2 != 0) {
- tcg_out_movi_imm13(s, TCG_REG_T1, c2);
- c2 = TCG_REG_T1;
- }
- t = c1, c1 = c2, c2 = t, c2const = 0;
- cond = tcg_swap_cond(cond);
- break;
-
- case TCG_COND_LTU:
case TCG_COND_LEU:
- break;
+ /* If we don't need to load a constant into a register, we can
+ swap the operands on GTU/LEU. There's no benefit to loading
+ the constant into a temporary register. */
+ if (!c2const || c2 == 0) {
+ TCGArg t = c1;
+ c1 = c2;
+ c2 = t;
+ c2const = 0;
+ cond = tcg_swap_cond(cond);
+ break;
+ }
+ /* FALLTHRU */
default:
tcg_out_cmp(s, c1, c2, c2const);
tcg_out_movi_imm13(s, ret, 0);
- tcg_out32(s, ARITH_MOVCC | INSN_RD(ret)
- | INSN_RS1(tcg_cond_to_bcond[cond])
- | MOVCC_ICC | INSN_IMM11(1));
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
return;
}
@@ -648,11 +724,16 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
TCGArg c1, TCGArg c2, int c2const)
{
- tcg_out_cmp(s, c1, c2, c2const);
- tcg_out_movi_imm13(s, ret, 0);
- tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret)
- | INSN_RS1(tcg_cond_to_bcond[cond])
- | MOVCC_XCC | INSN_IMM11(1));
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare
+ if the input does not overlap the output. */
+ if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movr(s, cond, ret, c1, 1, 1);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
+ }
}
#else
static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
@@ -660,35 +741,56 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
TCGArg bl, int blconst,
TCGArg bh, int bhconst)
{
- int lab;
+ int tmp = TCG_REG_T1;
+
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (ret != ah && (bhconst || ret != bh)) {
+ tmp = ret;
+ }
switch (cond) {
case TCG_COND_EQ:
- tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_T1, al, bl, blconst);
- tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst);
- tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_AND);
- break;
-
case TCG_COND_NE:
- tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_T1, al, al, blconst);
- tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst);
- tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_OR);
+ if (bl == 0 && bh == 0) {
+ if (cond == TCG_COND_EQ) {
+ tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC);
+ tcg_out_movi(s, TCG_TYPE_I32, ret, 1);
+ } else {
+ tcg_out_arith(s, ret, al, ah, ARITH_ORCC);
+ }
+ } else {
+ tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst);
+ tcg_out_cmp(s, ah, bh, bhconst);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
+ }
+ tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1);
break;
default:
- lab = gen_new_label();
-
+ /* <= : ah < bh | (ah == bh && al <= bl) */
+ tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst);
tcg_out_cmp(s, ah, bh, bhconst);
- tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), lab);
- tcg_out_movi_imm13(s, ret, 1);
- tcg_out_branch_i32(s, INSN_COND(COND_NE, 1), lab);
- tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
+ tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1);
+ tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1);
+ break;
+ }
+}
- tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), ret, al, bl, blconst);
+static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh,
+ TCGArg al, TCGArg ah, TCGArg bl, int blconst,
+ TCGArg bh, int bhconst, int opl, int oph)
+{
+ TCGArg tmp = TCG_REG_T1;
- tcg_out_label(s, lab, s->code_ptr);
- break;
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (rl != ah && (bhconst || rl != bh)) {
+ tmp = rl;
}
+
+ tcg_out_arithc(s, tmp, al, bl, blconst, opl);
+ tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
+ tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
}
#endif
@@ -859,8 +961,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
/* bne,pn %[xi]cc, label0 */
label_ptr[0] = (uint32_t *)s->code_ptr;
- tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
- | ((TARGET_LONG_BITS == 64) << 21)));
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
/* TLB Hit. */
/* Load all 64-bits into an O/G register. */
@@ -875,8 +977,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
/* b,a,pt label1 */
label_ptr[1] = (uint32_t *)s->code_ptr;
- tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
- | (1 << 29) | (1 << 19)));
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
} else {
/* The fast path is exactly one insn. Thus we can perform the
entire TLB Hit in the (annulled) delay slot of the branch
@@ -885,9 +986,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
/* beq,a,pt %[xi]cc, label0 */
label_ptr[0] = NULL;
label_ptr[1] = (uint32_t *)s->code_ptr;
- tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
- | ((TARGET_LONG_BITS == 64) << 21)
- | (1 << 29) | (1 << 19)));
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
/* delay slot */
tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
}
@@ -976,7 +1076,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
{
int addrlo_idx = 1, datalo, datahi, addr_reg;
#if defined(CONFIG_SOFTMMU)
- int memi_idx, memi, n;
+ int memi_idx, memi, n, datafull;
uint32_t *label_ptr;
#endif
@@ -993,23 +1093,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
offsetof(CPUTLBEntry, addr_write));
+ datafull = datalo;
if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
/* Reconstruct the full 64-bit value. */
tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
- datalo = TCG_REG_O2;
+ datafull = TCG_REG_O2;
}
/* The fast path is exactly one insn. Thus we can perform the entire
TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
/* beq,a,pt %[xi]cc, label0 */
label_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
- | ((TARGET_LONG_BITS == 64) << 21)
- | (1 << 29) | (1 << 19)));
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
/* delay slot */
- tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
+ tcg_out_ldst_rr(s, datafull, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
/* TLB Miss. */
@@ -1098,7 +1198,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_nop(s);
break;
case INDEX_op_br:
- tcg_out_branch_i32(s, COND_A, args[0]);
+ tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]);
tcg_out_nop(s);
break;
case INDEX_op_movi_i32:
@@ -1211,6 +1311,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_setcond_i32(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond_i32(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3], const_args[3]);
+ break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_brcond2_i32:
@@ -1224,16 +1328,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
args[4], const_args[4]);
break;
case INDEX_op_add2_i32:
- tcg_out_arithc(s, args[0], args[2], args[4], const_args[4],
- ARITH_ADDCC);
- tcg_out_arithc(s, args[1], args[3], args[5], const_args[5],
- ARITH_ADDX);
+ tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_ADDCC, ARITH_ADDX);
break;
case INDEX_op_sub2_i32:
- tcg_out_arithc(s, args[0], args[2], args[4], const_args[4],
- ARITH_SUBCC);
- tcg_out_arithc(s, args[1], args[3], args[5], const_args[5],
- ARITH_SUBX);
+ tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_SUBCC, ARITH_SUBX);
break;
case INDEX_op_mulu2_i32:
tcg_out_arithc(s, args[0], args[2], args[3], const_args[3],
@@ -1346,7 +1448,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_setcond_i64(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
-
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond_i64(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3], const_args[3]);
+ break;
#endif
gen_arith:
tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
@@ -1375,39 +1480,40 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_ld16u_i32, { "r", "r" } },
{ INDEX_op_ld16s_i32, { "r", "r" } },
{ INDEX_op_ld_i32, { "r", "r" } },
- { INDEX_op_st8_i32, { "r", "r" } },
- { INDEX_op_st16_i32, { "r", "r" } },
- { INDEX_op_st_i32, { "r", "r" } },
-
- { INDEX_op_add_i32, { "r", "r", "rJ" } },
- { INDEX_op_mul_i32, { "r", "r", "rJ" } },
- { INDEX_op_div_i32, { "r", "r", "rJ" } },
- { INDEX_op_divu_i32, { "r", "r", "rJ" } },
- { INDEX_op_rem_i32, { "r", "r", "rJ" } },
- { INDEX_op_remu_i32, { "r", "r", "rJ" } },
- { INDEX_op_sub_i32, { "r", "r", "rJ" } },
- { INDEX_op_and_i32, { "r", "r", "rJ" } },
- { INDEX_op_andc_i32, { "r", "r", "rJ" } },
- { INDEX_op_or_i32, { "r", "r", "rJ" } },
- { INDEX_op_orc_i32, { "r", "r", "rJ" } },
- { INDEX_op_xor_i32, { "r", "r", "rJ" } },
-
- { INDEX_op_shl_i32, { "r", "r", "rJ" } },
- { INDEX_op_shr_i32, { "r", "r", "rJ" } },
- { INDEX_op_sar_i32, { "r", "r", "rJ" } },
+ { INDEX_op_st8_i32, { "rZ", "r" } },
+ { INDEX_op_st16_i32, { "rZ", "r" } },
+ { INDEX_op_st_i32, { "rZ", "r" } },
+
+ { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_div_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_divu_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_rem_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_remu_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_and_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_andc_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_or_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_orc_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_xor_i32, { "r", "rZ", "rJ" } },
+
+ { INDEX_op_shl_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_shr_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_sar_i32, { "r", "rZ", "rJ" } },
{ INDEX_op_neg_i32, { "r", "rJ" } },
{ INDEX_op_not_i32, { "r", "rJ" } },
- { INDEX_op_brcond_i32, { "r", "rJ" } },
- { INDEX_op_setcond_i32, { "r", "r", "rJ" } },
+ { INDEX_op_brcond_i32, { "rZ", "rJ" } },
+ { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
#if TCG_TARGET_REG_BITS == 32
- { INDEX_op_brcond2_i32, { "r", "r", "rJ", "rJ" } },
- { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } },
- { INDEX_op_add2_i32, { "r", "r", "r", "r", "rJ", "rJ" } },
- { INDEX_op_sub2_i32, { "r", "r", "r", "r", "rJ", "rJ" } },
- { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
+ { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
#endif
#if TCG_TARGET_REG_BITS == 64
@@ -1420,27 +1526,27 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_ld32u_i64, { "r", "r" } },
{ INDEX_op_ld32s_i64, { "r", "r" } },
{ INDEX_op_ld_i64, { "r", "r" } },
- { INDEX_op_st8_i64, { "r", "r" } },
- { INDEX_op_st16_i64, { "r", "r" } },
- { INDEX_op_st32_i64, { "r", "r" } },
- { INDEX_op_st_i64, { "r", "r" } },
-
- { INDEX_op_add_i64, { "r", "r", "rJ" } },
- { INDEX_op_mul_i64, { "r", "r", "rJ" } },
- { INDEX_op_div_i64, { "r", "r", "rJ" } },
- { INDEX_op_divu_i64, { "r", "r", "rJ" } },
- { INDEX_op_rem_i64, { "r", "r", "rJ" } },
- { INDEX_op_remu_i64, { "r", "r", "rJ" } },
- { INDEX_op_sub_i64, { "r", "r", "rJ" } },
- { INDEX_op_and_i64, { "r", "r", "rJ" } },
- { INDEX_op_andc_i64, { "r", "r", "rJ" } },
- { INDEX_op_or_i64, { "r", "r", "rJ" } },
- { INDEX_op_orc_i64, { "r", "r", "rJ" } },
- { INDEX_op_xor_i64, { "r", "r", "rJ" } },
-
- { INDEX_op_shl_i64, { "r", "r", "rJ" } },
- { INDEX_op_shr_i64, { "r", "r", "rJ" } },
- { INDEX_op_sar_i64, { "r", "r", "rJ" } },
+ { INDEX_op_st8_i64, { "rZ", "r" } },
+ { INDEX_op_st16_i64, { "rZ", "r" } },
+ { INDEX_op_st32_i64, { "rZ", "r" } },
+ { INDEX_op_st_i64, { "rZ", "r" } },
+
+ { INDEX_op_add_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_div_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_divu_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_rem_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_remu_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_sub_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_and_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_andc_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_or_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_orc_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_xor_i64, { "r", "rZ", "rJ" } },
+
+ { INDEX_op_shl_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_shr_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_sar_i64, { "r", "rZ", "rJ" } },
{ INDEX_op_neg_i64, { "r", "rJ" } },
{ INDEX_op_not_i64, { "r", "rJ" } },
@@ -1448,8 +1554,9 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_ext32s_i64, { "r", "ri" } },
{ INDEX_op_ext32u_i64, { "r", "ri" } },
- { INDEX_op_brcond_i64, { "r", "rJ" } },
- { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
+ { INDEX_op_brcond_i64, { "rZ", "rJ" } },
+ { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } },
#endif
#if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 4a17f1e118..0e7d398a41 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -62,8 +62,9 @@ typedef enum {
TCG_REG_I7,
} TCGReg;
-#define TCG_CT_CONST_S11 0x100
-#define TCG_CT_CONST_S13 0x200
+#define TCG_CT_CONST_S11 0x100
+#define TCG_CT_CONST_S13 0x200
+#define TCG_CT_CONST_ZERO 0x400
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_O6
@@ -99,7 +100,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
-#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div_i64 1
@@ -121,7 +122,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
-#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
#endif
#define TCG_AREG0 TCG_REG_I0
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 551845801d..8100a5a2e0 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,6 +25,11 @@
int gen_new_label(void);
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+ *gen_opc_ptr++ = opc;
+}
+
static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
{
*gen_opc_ptr++ = opc;
@@ -886,6 +891,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
TCGV_HIGH(arg2));
+ /* Allow the optimizer room to replace add2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
}
static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -893,6 +900,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
TCGV_HIGH(arg2));
+ /* Allow the optimizer room to replace sub2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
}
static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1018,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
TCGV_LOW(arg1), TCGV_LOW(arg2));
+ /* Allow the optimizer room to replace mulu2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a171f784a4..5faaca5ebf 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1303,8 +1303,58 @@ static void tcg_liveness_analysis(TCGContext *s)
break;
case INDEX_op_end:
break;
- /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ args -= 6;
+ nb_iargs = 4;
+ nb_oargs = 2;
+ /* Test if the high part of the operation is dead, but not
+ the low part. The result can be optimized to a simple
+ add or sub. This happens often for x86_64 guest when the
+ cpu mode is set to 32 bit. */
+ if (dead_temps[args[1]]) {
+ if (dead_temps[args[0]]) {
+ goto do_remove;
+ }
+ /* Create the single operation plus nop. */
+ if (op == INDEX_op_add2_i32) {
+ op = INDEX_op_add_i32;
+ } else {
+ op = INDEX_op_sub_i32;
+ }
+ gen_opc_buf[op_index] = op;
+ args[1] = args[2];
+ args[2] = args[4];
+ assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+ /* Fall through and mark the single-word operation live. */
+ nb_iargs = 2;
+ nb_oargs = 1;
+ }
+ goto do_not_remove;
+
+ case INDEX_op_mulu2_i32:
+ args -= 4;
+ nb_iargs = 2;
+ nb_oargs = 2;
+ /* Likewise, test for the high part of the operation dead. */
+ if (dead_temps[args[1]]) {
+ if (dead_temps[args[0]]) {
+ goto do_remove;
+ }
+ gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+ args[1] = args[2];
+ args[2] = args[3];
+ assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+ /* Fall through and mark the single-word operation live. */
+ nb_oargs = 1;
+ }
+ goto do_not_remove;
+
default:
+ /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
args -= def->nb_args;
nb_iargs = def->nb_iargs;
nb_oargs = def->nb_oargs;
@@ -1318,6 +1368,7 @@ static void tcg_liveness_analysis(TCGContext *s)
if (!dead_temps[arg])
goto do_not_remove;
}
+ do_remove:
tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
#ifdef CONFIG_PROFILER
s->del_op_count++;