diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/arm/tcg-target.c | 56 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.c | 194 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 11 | ||||
-rw-r--r-- | tcg/optimize.c | 471 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 643 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 9 | ||||
-rw-r--r-- | tcg/tcg-op.h | 11 | ||||
-rw-r--r-- | tcg/tcg.c | 53 |
9 files changed, 943 insertions, 507 deletions
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 737200e5e6..e790bf04b4 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -467,6 +467,21 @@ static inline void tcg_out_movi32(TCGContext *s, } } +static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, + TCGArg lhs, TCGArg rhs, int rhs_is_const) +{ + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rI" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + assert(rot >= 0); + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); + } +} + static inline void tcg_out_mul32(TCGContext *s, int cond, int rd, int rs, int rm) { @@ -1557,6 +1572,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_movi_i32: tcg_out_movi32(s, COND_AL, args[0], args[1]); break; + case INDEX_op_movcond_i32: + /* Constraints mean that v2 is always in the same register as dest, + * so we only need to do "if condition passed, move v1 to dest". + */ + tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]], + ARITH_MOV, args[0], 0, args[3], const_args[3]); + break; case INDEX_op_add_i32: c = ARITH_ADD; goto gen_arith; @@ -1576,14 +1600,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, c = ARITH_EOR; /* Fall through. */ gen_arith: - if (const_args[2]) { - int rot; - rot = encode_imm(args[2]); - tcg_out_dat_imm(s, COND_AL, c, - args[0], args[1], rotl(args[2], rot) | (rot << 7)); - } else - tcg_out_dat_reg(s, COND_AL, c, - args[0], args[1], args[2], SHIFT_IMM_LSL(0)); + tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); break; case INDEX_op_add2_i32: tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC, @@ -1643,15 +1660,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_brcond_i32: - if (const_args[1]) { - int rot; - rot = encode_imm(args[1]); - tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, - args[0], rotl(args[1], rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, - args[0], args[1], SHIFT_IMM_LSL(0)); - } + tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, + args[0], args[1], const_args[1]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]); break; case INDEX_op_brcond2_i32: @@ -1670,15 +1680,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]); break; case INDEX_op_setcond_i32: - if (const_args[2]) { - int rot; - rot = encode_imm(args[2]); - tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, - args[1], rotl(args[2], rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, - args[1], args[2], SHIFT_IMM_LSL(0)); - } + tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, + args[1], args[2], const_args[2]); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], ARITH_MOV, args[0], 0, 1); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], @@ -1788,6 +1791,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_brcond_i32, { "r", "rI" } }, { INDEX_op_setcond_i32, { "r", "r", "rI" } }, + { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } }, /* TODO: "r", "r", "r", "r", "ri", "ri" */ { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } }, diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 2bc7dff846..98fa11b286 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -73,7 +73,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 enum { TCG_AREG0 = TCG_REG_R6, diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 705712f775..06570bea38 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -230,6 +230,8 @@ enum { OPC_CMP4_LT_A6 = 0x18400000000ull, OPC_CMP4_LTU_A6 = 0x1a400000000ull, OPC_CMP4_EQ_A6 = 0x1c400000000ull, + OPC_DEP_I14 = 0x0ae00000000ull, + OPC_DEP_I15 = 0x08000000000ull, OPC_DEP_Z_I12 = 0x0a600000000ull, OPC_EXTR_I11 = 0x0a400002000ull, OPC_EXTR_U_I11 = 0x0a400000000ull, @@ -501,6 +503,30 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1, | (qp & 0x3f); } +static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((imm & 0x01) << 36) + | ((len & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((pos & 0x3f) << 14) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((pos & 0x3f) << 31) + | ((len & 0x0f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm) { return opc @@ -1312,6 +1338,37 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb)); } +static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, + TCGArg a2, int const_a2, int pos, int len) +{ + uint64_t i1 = 0, i2 = 0; + int cpos = 63 - pos, lm1 = len - 1; + + if (const_a2) { + /* Truncate the value of a constant a2 to the width of the field. */ + int mask = (1u << len) - 1; + a2 &= mask; + + if (a2 == 0 || a2 == mask) { + /* 1-bit signed constant inserted into register. */ + i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1); + } else { + /* Otherwise, load any constant into a temporary. Do this into + the first I slot to help out with cross-unit delays. */ + i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, + TCG_REG_R2, a2, TCG_REG_R0); + a2 = TCG_REG_R2; + } + } + if (i2 == 0) { + i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1); + } + tcg_out_bundle(s, (i1 ? mII : miI), + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + i2); +} + static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, TCGArg arg2, int cmp4) { @@ -1404,21 +1461,47 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0)); } +static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg c1, TCGArg c2, + TCGArg v1, int const_v1, + TCGArg v2, int const_v2, int cmp4) +{ + uint64_t opc1, opc2; + + if (const_v1) { + opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0); + } else if (ret == v1) { + opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + } else { + opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1); + } + if (const_v2) { + opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0); + } else if (ret == v2) { + opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + } else { + opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2); + } + + tcg_out_bundle(s, MmI, + tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4), + opc1, + opc2); +} + #if defined(CONFIG_SOFTMMU) #include "../../softmmu_defs.h" /* Load and compare a TLB entry, and return the result in (p6, p7). R2 is loaded with the address of the addend TLB entry. - R56 is loaded with the address, zero extented on 32-bit targets. */ + R57 is loaded with the address, zero extented on 32-bit targets. */ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, int s_bits, uint64_t offset_rw, uint64_t offset_addend) { tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3, - TARGET_PAGE_MASK | ((1 << s_bits) - 1), - TCG_REG_R0), + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2, addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2, @@ -1428,9 +1511,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, offset_rw, TCG_REG_R2), #if TARGET_LONG_BITS == 32 - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg), #else - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57, 0, addr_reg), #endif tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, @@ -1438,12 +1521,13 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, tcg_out_bundle(s, mII, tcg_opc_m3 (TCG_REG_P0, (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57, + ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56, TCG_REG_R2, offset_addend - offset_rw), - tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), + tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0, + TCG_REG_R57, 63 - s_bits, + TARGET_PAGE_BITS - s_bits - 1), tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R3, TCG_REG_R57)); + TCG_REG_P7, TCG_REG_R3, TCG_REG_R56)); } /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, @@ -1480,8 +1564,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57, - mem_index, TCG_REG_R0), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, + TCG_REG_R56, 0, TCG_AREG0), tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, (tcg_target_long) qemu_ld_helpers[s_bits])); @@ -1489,7 +1573,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, TCG_REG_R2, 8), tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), + TCG_REG_R3, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); if (bswap && s_bits == 1) { @@ -1513,23 +1597,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } - /* XXX/FIXME: suboptimal */ - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, - mem_index, TCG_REG_R0), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R57, 0, TCG_REG_R56), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R56, 0, TCG_AREG0)); if (!bswap || s_bits == 0) { tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, + mem_index, TCG_REG_R0), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); } else { tcg_out_bundle(s, miB, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58, + mem_index, TCG_REG_R0), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R8, TCG_REG_R8, 0xb), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, @@ -1581,8 +1659,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, - tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57, - 0, data_reg), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, + TCG_REG_R56, 0, TCG_AREG0), tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]), tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, (tcg_target_long) qemu_st_helpers[opc])); @@ -1590,31 +1668,42 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, TCG_REG_R2, 8), tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R56), + TCG_REG_R3, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); if (!bswap || opc == 0) { - tcg_out_bundle(s, mII, + tcg_out_bundle(s, mii, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } else if (opc == 1) { - tcg_out_bundle(s, mII, + tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 15, 15), + TCG_REG_R2, data_reg, 15, 15)); + tcg_out_bundle(s, miI, + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R2, TCG_REG_R2, 0xb)); data_reg = TCG_REG_R2; } else if (opc == 2) { - tcg_out_bundle(s, mII, + tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 31, 31), + TCG_REG_R2, data_reg, 31, 31)); + tcg_out_bundle(s, miI, + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R2, TCG_REG_R2, 0xb)); data_reg = TCG_REG_R2; @@ -1622,25 +1711,18 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_out_bundle(s, miI, tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), + tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58, + 0, data_reg), tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3, TCG_REG_R2, data_reg, 0xb)); data_reg = TCG_REG_R2; } - /* XXX/FIXME: suboptimal */ - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59, - mem_index, TCG_REG_R0), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R58, 0, TCG_REG_R57), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R57, 0, TCG_REG_R56)); tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc], data_reg, TCG_REG_R3), - tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, - TCG_REG_R56, 0, TCG_AREG0), + tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59, + mem_index, TCG_REG_R0), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); } @@ -2092,6 +2174,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_bswap64(s, args[0], args[1]); break; + case INDEX_op_deposit_i32: + case INDEX_op_deposit_i64: + tcg_out_deposit(s, args[0], args[1], args[2], const_args[2], + args[3], args[4]); + break; + case INDEX_op_brcond_i32: tcg_out_brcond(s, args[2], args[0], const_args[0], args[1], const_args[1], args[3], 1); @@ -2106,6 +2194,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_setcond_i64: tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0); break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], + args[3], const_args[3], args[4], const_args[4], 1); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], + args[3], const_args[3], args[4], const_args[4], 0); + break; case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, 0); @@ -2196,6 +2292,7 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_brcond_i32, { "rI", "rI" } }, { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, { INDEX_op_mov_i64, { "r", "r" } }, { INDEX_op_movi_i64, { "r" } }, @@ -2245,6 +2342,10 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_brcond_i64, { "rI", "rI" } }, { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } }, + + { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, + { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, { INDEX_op_qemu_ld8u, { "r", "r" } }, { INDEX_op_qemu_ld8s, { "r", "r" } }, @@ -2269,9 +2370,12 @@ static void tcg_target_qemu_prologue(TCGContext *s) int frame_size; /* reserve some stack space */ - frame_size = TCG_STATIC_CALL_ARGS_SIZE; + frame_size = TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long); frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1); + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); /* First emit adhoc function descriptor */ *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */ @@ -2378,6 +2482,4 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6); tcg_add_target_add_op_defs(ia64_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), - CPU_TEMP_BUF_NLONGS * sizeof(long)); } diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index b7e01b25ae..91fe7a3b06 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -131,10 +131,13 @@ typedef enum { #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_deposit_i32 0 -#define TCG_TARGET_HAS_deposit_i64 0 -#define TCG_TARGET_HAS_movcond_i32 0 -#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_deposit_i64 1 + +#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) +#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub r1, r0, r3 */ diff --git a/tcg/optimize.c b/tcg/optimize.c index edb2b0ea90..a06c8eb43e 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -292,6 +292,82 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) return res; } +static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) +{ + switch (c) { + case TCG_COND_EQ: + return x == y; + case TCG_COND_NE: + return x != y; + case TCG_COND_LT: + return (int32_t)x < (int32_t)y; + case TCG_COND_GE: + return (int32_t)x >= (int32_t)y; + case TCG_COND_LE: + return (int32_t)x <= (int32_t)y; + case TCG_COND_GT: + return (int32_t)x > (int32_t)y; + case TCG_COND_LTU: + return x < y; + case TCG_COND_GEU: + return x >= y; + case TCG_COND_LEU: + return x <= y; + case TCG_COND_GTU: + return x > y; + default: + tcg_abort(); + } +} + +static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) +{ + switch (c) { + case TCG_COND_EQ: + return x == y; + case TCG_COND_NE: + return x != y; + case TCG_COND_LT: + return (int64_t)x < (int64_t)y; + case TCG_COND_GE: + return (int64_t)x >= (int64_t)y; + case TCG_COND_LE: + return (int64_t)x <= (int64_t)y; + case TCG_COND_GT: + return (int64_t)x > (int64_t)y; + case TCG_COND_LTU: + return x < y; + case TCG_COND_GEU: + return x >= y; + case TCG_COND_LEU: + return x <= y; + case TCG_COND_GTU: + return x > y; + default: + tcg_abort(); + } +} + +static bool do_constant_folding_cond_eq(TCGCond c) +{ + switch (c) { + case TCG_COND_GT: + case TCG_COND_LTU: + case TCG_COND_LT: + case TCG_COND_GTU: + case TCG_COND_NE: + return 0; + case TCG_COND_GE: + case TCG_COND_GEU: + case TCG_COND_LE: + case TCG_COND_LEU: + case TCG_COND_EQ: + return 1; + default: + tcg_abort(); + } +} + /* Return 2 if the condition can't be simplified, and the result of the condition (0 or 1) if it can */ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, @@ -300,75 +376,14 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { switch (op_bits(op)) { case 32: - switch (c) { - case TCG_COND_EQ: - return (uint32_t)temps[x].val == (uint32_t)temps[y].val; - case TCG_COND_NE: - return (uint32_t)temps[x].val != (uint32_t)temps[y].val; - case TCG_COND_LT: - return (int32_t)temps[x].val < (int32_t)temps[y].val; - case TCG_COND_GE: - return (int32_t)temps[x].val >= (int32_t)temps[y].val; - case TCG_COND_LE: - return (int32_t)temps[x].val <= (int32_t)temps[y].val; - case TCG_COND_GT: - return (int32_t)temps[x].val > (int32_t)temps[y].val; - case TCG_COND_LTU: - return (uint32_t)temps[x].val < (uint32_t)temps[y].val; - case TCG_COND_GEU: - return (uint32_t)temps[x].val >= (uint32_t)temps[y].val; - case TCG_COND_LEU: - return (uint32_t)temps[x].val <= (uint32_t)temps[y].val; - case TCG_COND_GTU: - return (uint32_t)temps[x].val > (uint32_t)temps[y].val; - default: - break; - } - break; + return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); case 64: - switch (c) { - case TCG_COND_EQ: - return (uint64_t)temps[x].val == (uint64_t)temps[y].val; - case TCG_COND_NE: - return (uint64_t)temps[x].val != (uint64_t)temps[y].val; - case TCG_COND_LT: - return (int64_t)temps[x].val < (int64_t)temps[y].val; - case TCG_COND_GE: - return (int64_t)temps[x].val >= (int64_t)temps[y].val; - case TCG_COND_LE: - return (int64_t)temps[x].val <= (int64_t)temps[y].val; - case TCG_COND_GT: - return (int64_t)temps[x].val > (int64_t)temps[y].val; - case TCG_COND_LTU: - return (uint64_t)temps[x].val < (uint64_t)temps[y].val; - case TCG_COND_GEU: - return (uint64_t)temps[x].val >= (uint64_t)temps[y].val; - case TCG_COND_LEU: - return (uint64_t)temps[x].val <= (uint64_t)temps[y].val; - case TCG_COND_GTU: - return (uint64_t)temps[x].val > (uint64_t)temps[y].val; - default: - break; - } - break; - } - } else if (temps_are_copies(x, y)) { - switch (c) { - case TCG_COND_GT: - case TCG_COND_LTU: - case TCG_COND_LT: - case TCG_COND_GTU: - case TCG_COND_NE: - return 0; - case TCG_COND_GE: - case TCG_COND_GEU: - case TCG_COND_LE: - case TCG_COND_LEU: - case TCG_COND_EQ: - return 1; + return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); default: - break; + tcg_abort(); } + } else if (temps_are_copies(x, y)) { + return do_constant_folding_cond_eq(c); } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { switch (c) { case TCG_COND_LTU: @@ -381,11 +396,73 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, } else { return 2; } +} - fprintf(stderr, - "Unrecognized bitness %d or condition %d in " - "do_constant_folding_cond.\n", op_bits(op), c); - tcg_abort(); +/* Return 2 if the condition can't be simplified, and the result + of the condition (0 or 1) if it can */ +static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) +{ + TCGArg al = p1[0], ah = p1[1]; + TCGArg bl = p2[0], bh = p2[1]; + + if (temps[bl].state == TCG_TEMP_CONST + && temps[bh].state == TCG_TEMP_CONST) { + uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; + + if (temps[al].state == TCG_TEMP_CONST + && temps[ah].state == TCG_TEMP_CONST) { + uint64_t a; + a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; + return do_constant_folding_cond_64(a, b, c); + } + if (b == 0) { + switch (c) { + case TCG_COND_LTU: + return 0; + case TCG_COND_GEU: + return 1; + default: + break; + } + } + } + if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { + return do_constant_folding_cond_eq(c); + } + return 2; +} + +static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) +{ + TCGArg a1 = *p1, a2 = *p2; + int sum = 0; + sum += temps[a1].state == TCG_TEMP_CONST; + sum -= temps[a2].state == TCG_TEMP_CONST; + + /* Prefer the constant in second argument, and then the form + op a, a, b, which is better handled on non-RISC hosts. */ + if (sum > 0 || (sum == 0 && dest == a2)) { + *p1 = a2; + *p2 = a1; + return true; + } + return false; +} + +static bool swap_commutative2(TCGArg *p1, TCGArg *p2) +{ + int sum = 0; + sum += temps[p1[0]].state == TCG_TEMP_CONST; + sum += temps[p1[1]].state == TCG_TEMP_CONST; + sum -= temps[p2[0]].state == TCG_TEMP_CONST; + sum -= temps[p2[1]].state == TCG_TEMP_CONST; + if (sum > 0) { + TCGArg t; + t = p1[0], p1[0] = p2[0], p2[0] = t; + t = p1[1], p1[1] = p2[1], p2[1] = t; + return true; + } + return false; } /* Propagate constants and copies, fold constant expressions. */ @@ -397,7 +474,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, const TCGOpDef *def; TCGArg *gen_args; TCGArg tmp; - TCGCond cond; /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. @@ -440,52 +516,46 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): - /* Prefer the constant in second argument, and then the form - op a, a, b, which is better handled on non-RISC hosts. */ - if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2] - && temps[args[2]].state != TCG_TEMP_CONST)) { - tmp = args[1]; - args[1] = args[2]; - args[2] = tmp; - } + swap_commutative(args[0], &args[1], &args[2]); break; CASE_OP_32_64(brcond): - if (temps[args[0]].state == TCG_TEMP_CONST - && temps[args[1]].state != TCG_TEMP_CONST) { - tmp = args[0]; - args[0] = args[1]; - args[1] = tmp; + if (swap_commutative(-1, &args[0], &args[1])) { args[2] = tcg_swap_cond(args[2]); } break; CASE_OP_32_64(setcond): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state != TCG_TEMP_CONST) { - tmp = args[1]; - args[1] = args[2]; - args[2] = tmp; + if (swap_commutative(args[0], &args[1], &args[2])) { args[3] = tcg_swap_cond(args[3]); } break; CASE_OP_32_64(movcond): - cond = args[5]; - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state != TCG_TEMP_CONST) { - tmp = args[1]; - args[1] = args[2]; - args[2] = tmp; - cond = tcg_swap_cond(cond); + if (swap_commutative(-1, &args[1], &args[2])) { + args[5] = tcg_swap_cond(args[5]); } /* For movcond, we canonicalize the "false" input reg to match the destination reg so that the tcg backend can implement a "move if true" operation. */ - if (args[0] == args[3]) { - tmp = args[3]; - args[3] = args[4]; - args[4] = tmp; - cond = tcg_invert_cond(cond); + if (swap_commutative(args[0], &args[4], &args[3])) { + args[5] = tcg_invert_cond(args[5]); } - args[5] = cond; + break; + case INDEX_op_add2_i32: + swap_commutative(args[0], &args[2], &args[4]); + swap_commutative(args[1], &args[3], &args[5]); + break; + case INDEX_op_mulu2_i32: + swap_commutative(args[0], &args[2], &args[3]); + break; + case INDEX_op_brcond2_i32: + if (swap_commutative2(&args[0], &args[2])) { + args[4] = tcg_swap_cond(args[4]); + } + break; + case INDEX_op_setcond2_i32: + if (swap_commutative2(&args[1], &args[3])) { + args[5] = tcg_swap_cond(args[5]); + } + break; default: break; } @@ -622,6 +692,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_args += 2; args += 2; break; + CASE_OP_32_64(not): CASE_OP_32_64(neg): CASE_OP_32_64(ext8s): @@ -634,14 +705,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, 0); tcg_opt_gen_movi(gen_args, args[0], tmp); - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; + gen_args += 2; + args += 2; + break; } - gen_args += 2; - args += 2; - break; + goto do_default; + CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): @@ -665,15 +734,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, temps[args[2]].val); tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args += 3; + args += 3; + break; } - args += 3; - break; + goto do_default; + CASE_OP_32_64(deposit): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { @@ -683,33 +748,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, | ((temps[args[2]].val & tmp) << args[3]); tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args[4] = args[4]; - gen_args += 5; + args += 5; + break; } - args += 5; - break; + goto do_default; + CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); if (tmp != 2) { gen_opc_buf[op_index] = op_to_movi(op); tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args += 4; + args += 4; + break; } - args += 4; - break; + goto do_default; + CASE_OP_32_64(brcond): tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); if (tmp != 2) { @@ -721,17 +775,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } else { gen_opc_buf[op_index] = INDEX_op_nop; } - } else { - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args += 4; + args += 4; + break; } - args += 4; - break; + goto do_default; + CASE_OP_32_64(movcond): tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); if (tmp != 2) { @@ -746,18 +794,125 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); gen_args += 2; } + args += 6; + break; + } + goto do_default; + + case INDEX_op_add2_i32: + case INDEX_op_sub2_i32: + if (temps[args[2]].state == TCG_TEMP_CONST + && temps[args[3]].state == TCG_TEMP_CONST + && temps[args[4]].state == TCG_TEMP_CONST + && temps[args[5]].state == TCG_TEMP_CONST) { + uint32_t al = temps[args[2]].val; + uint32_t ah = temps[args[3]].val; + uint32_t bl = temps[args[4]].val; + uint32_t bh = temps[args[5]].val; + uint64_t a = ((uint64_t)ah << 32) | al; + uint64_t b = ((uint64_t)bh << 32) | bl; + TCGArg rl, rh; + + if (op == INDEX_op_add2_i32) { + a += b; + } else { + a -= b; + } + + /* We emit the extra nop when we emit the add2/sub2. */ + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + + rl = args[0]; + rh = args[1]; + gen_opc_buf[op_index] = INDEX_op_movi_i32; + gen_opc_buf[++op_index] = INDEX_op_movi_i32; + tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); + tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); + gen_args += 4; + args += 6; + break; + } + goto do_default; + + case INDEX_op_mulu2_i32: + if (temps[args[2]].state == TCG_TEMP_CONST + && temps[args[3]].state == TCG_TEMP_CONST) { + uint32_t a = temps[args[2]].val; + uint32_t b = temps[args[3]].val; + uint64_t r = (uint64_t)a * b; + TCGArg rl, rh; + + /* We emit the extra nop when we emit the mulu2. */ + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + + rl = args[0]; + rh = args[1]; + gen_opc_buf[op_index] = INDEX_op_movi_i32; + gen_opc_buf[++op_index] = INDEX_op_movi_i32; + tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); + tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); + gen_args += 4; + args += 4; + break; + } + goto do_default; + + case INDEX_op_brcond2_i32: + tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); + if (tmp != 2) { + if (tmp) { + memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + gen_opc_buf[op_index] = INDEX_op_br; + gen_args[0] = args[5]; + gen_args += 1; + } else { + gen_opc_buf[op_index] = INDEX_op_nop; + } + } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) + && temps[args[2]].state == TCG_TEMP_CONST + && temps[args[3]].state == TCG_TEMP_CONST + && temps[args[2]].val == 0 + && temps[args[3]].val == 0) { + /* Simplify LT/GE comparisons vs zero to a single compare + vs the high word of the input. */ + memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + gen_opc_buf[op_index] = INDEX_op_brcond_i32; + gen_args[0] = args[1]; + gen_args[1] = args[3]; + gen_args[2] = args[4]; + gen_args[3] = args[5]; + gen_args += 4; } else { - reset_temp(args[0]); + goto do_default; + } + args += 6; + break; + + case INDEX_op_setcond2_i32: + tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); + if (tmp != 2) { + gen_opc_buf[op_index] = INDEX_op_movi_i32; + tcg_opt_gen_movi(gen_args, args[0], tmp); + gen_args += 2; + } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) + && temps[args[3]].state == TCG_TEMP_CONST + && temps[args[4]].state == TCG_TEMP_CONST + && temps[args[3]].val == 0 + && temps[args[4]].val == 0) { + /* Simplify LT/GE comparisons vs zero to a single compare + vs the high word of the input. */ + gen_opc_buf[op_index] = INDEX_op_setcond_i32; gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args[4] = args[4]; - gen_args[5] = args[5]; - gen_args += 6; + gen_args[1] = args[2]; + gen_args[2] = args[4]; + gen_args[3] = args[5]; + gen_args += 4; + } else { + goto do_default; } args += 6; break; + case INDEX_op_call: nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) { @@ -776,11 +931,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, i--; } break; + default: - /* Default case: we do know nothing about operation so no - propagation is done. We trash everything if the operation - is the end of a basic block, otherwise we only trash the - output args. */ + do_default: + /* Default case: we know nothing about operation (or were unable + to compute the operation result) so no propagation is done. + We trash everything if the operation is the end of a basic + block, otherwise we only trash the output args. */ if (def->flags & TCG_OPF_BB_END) { memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); } else { diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 0c32baa50e..f146647874 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -115,96 +115,6 @@ static const int tcg_target_call_oarg_regs[] = { TCG_REG_O3, }; -static inline int check_fit_tl(tcg_target_long val, unsigned int bits) -{ - return (val << ((sizeof(tcg_target_long) * 8 - bits)) - >> (sizeof(tcg_target_long) * 8 - bits)) == val; -} - -static inline int check_fit_i32(uint32_t val, unsigned int bits) -{ - return ((val << (32 - bits)) >> (32 - bits)) == val; -} - -static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) -{ - value += addend; - switch (type) { - case R_SPARC_32: - if (value != (uint32_t)value) - tcg_abort(); - *(uint32_t *)code_ptr = value; - break; - case R_SPARC_WDISP22: - value -= (long)code_ptr; - value >>= 2; - if (!check_fit_tl(value, 22)) - tcg_abort(); - *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x3fffff) | value; - break; - case R_SPARC_WDISP19: - value -= (long)code_ptr; - value >>= 2; - if (!check_fit_tl(value, 19)) - tcg_abort(); - *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x7ffff) | value; - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - break; - case 'L': /* qemu_ld/st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - // Helper args - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); - break; - case 'I': - ct->ct |= TCG_CT_CONST_S11; - break; - case 'J': - ct->ct |= TCG_CT_CONST_S13; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) -{ - int ct; - - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) - return 1; - else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) - return 1; - else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) - return 1; - else - return 0; -} - #define INSN_OP(x) ((x) << 30) #define INSN_OP2(x) ((x) << 22) #define INSN_OP3(x) ((x) << 19) @@ -214,12 +124,13 @@ static inline int tcg_target_const_match(tcg_target_long val, #define INSN_RS2(x) (x) #define INSN_ASI(x) ((x) << 5) +#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff)) #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff)) #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff)) +#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20)) #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff) -#define INSN_OFF22(x) (((x) >> 2) & 0x3fffff) +#define INSN_COND(x) ((x) << 25) -#define INSN_COND(x, a) (((x) << 25) | ((a) << 29)) #define COND_N 0x0 #define COND_E 0x1 #define COND_LE 0x2 @@ -236,11 +147,26 @@ static inline int tcg_target_const_match(tcg_target_long val, #define COND_CC 0xd #define COND_POS 0xe #define COND_VC 0xf -#define BA (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2)) +#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2)) + +#define RCOND_Z 1 +#define RCOND_LEZ 2 +#define RCOND_LZ 3 +#define RCOND_NZ 5 +#define RCOND_GZ 6 +#define RCOND_GEZ 7 #define MOVCC_ICC (1 << 18) #define MOVCC_XCC (1 << 18 | 1 << 12) +#define BPCC_ICC 0 +#define BPCC_XCC (2 << 20) +#define BPCC_PT (1 << 19) +#define BPCC_PN 0 +#define BPCC_A (1 << 29) + +#define BPR_PT BPCC_PT + #define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) #define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) @@ -260,6 +186,7 @@ static inline int tcg_target_const_match(tcg_target_long val, #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) +#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) #define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25)) #define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26)) @@ -313,6 +240,109 @@ static inline int tcg_target_const_match(tcg_target_long val, #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) +static inline int check_fit_tl(tcg_target_long val, unsigned int bits) +{ + return (val << ((sizeof(tcg_target_long) * 8 - bits)) + >> (sizeof(tcg_target_long) * 8 - bits)) == val; +} + +static inline int check_fit_i32(uint32_t val, unsigned int bits) +{ + return ((val << (32 - bits)) >> (32 - bits)) == val; +} + +static void patch_reloc(uint8_t *code_ptr, int type, + tcg_target_long value, tcg_target_long addend) +{ + uint32_t insn; + value += addend; + switch (type) { + case R_SPARC_32: + if (value != (uint32_t)value) { + tcg_abort(); + } + *(uint32_t *)code_ptr = value; + break; + case R_SPARC_WDISP16: + value -= (long)code_ptr; + if (!check_fit_tl(value >> 2, 16)) { + tcg_abort(); + } + insn = *(uint32_t *)code_ptr; + insn &= ~INSN_OFF16(-1); + insn |= INSN_OFF16(value); + *(uint32_t *)code_ptr = insn; + break; + case R_SPARC_WDISP19: + value -= (long)code_ptr; + if (!check_fit_tl(value >> 2, 19)) { + tcg_abort(); + } + insn = *(uint32_t *)code_ptr; + insn &= ~INSN_OFF19(-1); + insn |= INSN_OFF19(value); + *(uint32_t *)code_ptr = insn; + break; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch (ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + break; + case 'L': /* qemu_ld/st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + // Helper args + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); + break; + case 'I': + ct->ct |= TCG_CT_CONST_S11; + break; + case 'J': + ct->ct |= TCG_CT_CONST_S13; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) { + return 1; + } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) { + return 1; + } else { + return 0; + } +} + static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2, int op) { @@ -337,7 +367,9 @@ static void tcg_out_arithc(TCGContext *s, int rd, int rs1, static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); + if (ret != arg) { + tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); + } } static inline void tcg_out_sethi(TCGContext *s, int ret, uint32_t arg) @@ -479,39 +511,6 @@ static inline void tcg_out_nop(TCGContext *s) tcg_out_sethi(s, TCG_REG_G0, 0); } -static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index) -{ - TCGLabel *l = &s->labels[label_index]; - uint32_t off22; - - if (l->has_value) { - off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr); - } else { - /* Make sure to preserve destinations during retranslation. */ - off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1); - tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0); - } - tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22); -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index) -{ - TCGLabel *l = &s->labels[label_index]; - uint32_t off19; - - if (l->has_value) { - off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr); - } else { - /* Make sure to preserve destinations during retranslation. */ - off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1); - tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0); - } - tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) | - (0x5 << 19) | off19)); -} -#endif - static const uint8_t tcg_cond_to_bcond[] = { [TCG_COND_EQ] = COND_E, [TCG_COND_NE] = COND_NE, @@ -525,70 +524,144 @@ static const uint8_t tcg_cond_to_bcond[] = { [TCG_COND_GTU] = COND_GU, }; +static const uint8_t tcg_cond_to_rcond[] = { + [TCG_COND_EQ] = RCOND_Z, + [TCG_COND_NE] = RCOND_NZ, + [TCG_COND_LT] = RCOND_LZ, + [TCG_COND_GT] = RCOND_GZ, + [TCG_COND_LE] = RCOND_LEZ, + [TCG_COND_GE] = RCOND_GEZ +}; + +static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) +{ + tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19); +} + +static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label) +{ + TCGLabel *l = &s->labels[label]; + int off19; + + if (l->has_value) { + off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr); + } else { + /* Make sure to preserve destinations during retranslation. */ + off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1); + tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0); + } + tcg_out_bpcc0(s, scond, flags, off19); +} + static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const) { tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); } -static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - int label_index) +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1, + TCGArg arg2, int const_arg2, int label) { tcg_out_cmp(s, arg1, arg2, const_arg2); - tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index); + tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label); tcg_out_nop(s); } +static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGArg ret, + TCGArg v1, int v1const) +{ + tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) + | INSN_RS1(tcg_cond_to_bcond[cond]) + | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg c1, TCGArg c2, int c2const, + TCGArg v1, int v1const) +{ + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); +} + #if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - int label_index) +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, + TCGArg arg2, int const_arg2, int label) { - tcg_out_cmp(s, arg1, arg2, const_arg2); - tcg_out_branch_i64(s, tcg_cond_to_bcond[cond], label_index); + /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ + if (arg2 == 0 && !is_unsigned_cond(cond)) { + TCGLabel *l = &s->labels[label]; + int off16; + + if (l->has_value) { + off16 = INSN_OFF16(l->u.value - (unsigned long)s->code_ptr); + } else { + /* Make sure to preserve destinations during retranslation. */ + off16 = *(uint32_t *)s->code_ptr & INSN_OFF16(-1); + tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, label, 0); + } + tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) + | INSN_COND(tcg_cond_to_rcond[cond]) | off16); + } else { + tcg_out_cmp(s, arg1, arg2, const_arg2); + tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, label); + } tcg_out_nop(s); } + +static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, + TCGArg v1, int v1const) +{ + tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) + | (tcg_cond_to_rcond[cond] << 10) + | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg c1, TCGArg c2, int c2const, + TCGArg v1, int v1const) +{ + /* For 64-bit signed comparisons vs zero, we can avoid the compare. + Note that the immediate range is one bit smaller, so we must check + for that as well. */ + if (c2 == 0 && !is_unsigned_cond(cond) + && (!v1const || check_fit_tl(v1, 10))) { + tcg_out_movr(s, cond, ret, c1, v1, v1const); + } else { + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); + } +} #else static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond, TCGArg al, TCGArg ah, TCGArg bl, int blconst, TCGArg bh, int bhconst, int label_dest) { - int cc, label_next = gen_new_label(); + int scond, label_next = gen_new_label(); tcg_out_cmp(s, ah, bh, bhconst); /* Note that we fill one of the delay slots with the second compare. */ switch (cond) { case TCG_COND_EQ: - cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0); - tcg_out_branch_i32(s, cc, label_next); + tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); tcg_out_cmp(s, al, bl, blconst); - cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_EQ], 0); - tcg_out_branch_i32(s, cc, label_dest); + tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest); break; case TCG_COND_NE: - cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0); - tcg_out_branch_i32(s, cc, label_dest); + tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); tcg_out_cmp(s, al, bl, blconst); - tcg_out_branch_i32(s, cc, label_dest); + tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); break; default: - /* ??? One could fairly easily special-case 64-bit unsigned - compares against 32-bit zero-extended constants. For instance, - we know that (unsigned)AH < 0 is false and need not emit it. - Similarly, (unsigned)AH > 0 being true implies AH != 0, so the - second branch will never be taken. */ - cc = INSN_COND(tcg_cond_to_bcond[cond], 0); - tcg_out_branch_i32(s, cc, label_dest); + scond = tcg_cond_to_bcond[tcg_high_cond(cond)]; + tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); tcg_out_nop(s); - cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0); - tcg_out_branch_i32(s, cc, label_next); + tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); tcg_out_cmp(s, al, bl, blconst); - cc = INSN_COND(tcg_cond_to_bcond[tcg_unsigned_cond(cond)], 0); - tcg_out_branch_i32(s, cc, label_dest); + scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)]; + tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); break; } tcg_out_nop(s); @@ -600,39 +673,42 @@ static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond, static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, TCGArg c2, int c2const) { - TCGArg t; - /* For 32-bit comparisons, we can play games with ADDX/SUBX. */ switch (cond) { + case TCG_COND_LTU: + case TCG_COND_GEU: + /* The result of the comparison is in the carry bit. */ + break; + case TCG_COND_EQ: case TCG_COND_NE: + /* For equality, we can transform to inequality vs zero. */ if (c2 != 0) { tcg_out_arithc(s, ret, c1, c2, c2const, ARITH_XOR); } c1 = TCG_REG_G0, c2 = ret, c2const = 0; - cond = (cond == TCG_COND_EQ ? TCG_COND_LEU : TCG_COND_LTU); + cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); break; case TCG_COND_GTU: - case TCG_COND_GEU: - if (c2const && c2 != 0) { - tcg_out_movi_imm13(s, TCG_REG_T1, c2); - c2 = TCG_REG_T1; - } - t = c1, c1 = c2, c2 = t, c2const = 0; - cond = tcg_swap_cond(cond); - break; - - case TCG_COND_LTU: case TCG_COND_LEU: - break; + /* If we don't need to load a constant into a register, we can + swap the operands on GTU/LEU. There's no benefit to loading + the constant into a temporary register. */ + if (!c2const || c2 == 0) { + TCGArg t = c1; + c1 = c2; + c2 = t; + c2const = 0; + cond = tcg_swap_cond(cond); + break; + } + /* FALLTHRU */ default: tcg_out_cmp(s, c1, c2, c2const); tcg_out_movi_imm13(s, ret, 0); - tcg_out32(s, ARITH_MOVCC | INSN_RD(ret) - | INSN_RS1(tcg_cond_to_bcond[cond]) - | MOVCC_ICC | INSN_IMM11(1)); + tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1); return; } @@ -648,11 +724,16 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, TCGArg c2, int c2const) { - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movi_imm13(s, ret, 0); - tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret) - | INSN_RS1(tcg_cond_to_bcond[cond]) - | MOVCC_XCC | INSN_IMM11(1)); + /* For 64-bit signed comparisons vs zero, we can avoid the compare + if the input does not overlap the output. */ + if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movr(s, cond, ret, c1, 1, 1); + } else { + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); + } } #else static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, @@ -660,35 +741,56 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg bl, int blconst, TCGArg bh, int bhconst) { - int lab; + int tmp = TCG_REG_T1; + + /* Note that the low parts are fully consumed before tmp is set. */ + if (ret != ah && (bhconst || ret != bh)) { + tmp = ret; + } switch (cond) { case TCG_COND_EQ: - tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_T1, al, bl, blconst); - tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst); - tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_AND); - break; - case TCG_COND_NE: - tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_T1, al, al, blconst); - tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst); - tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_OR); + if (bl == 0 && bh == 0) { + if (cond == TCG_COND_EQ) { + tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC); + tcg_out_movi(s, TCG_TYPE_I32, ret, 1); + } else { + tcg_out_arith(s, ret, al, ah, ARITH_ORCC); + } + } else { + tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst); + tcg_out_cmp(s, ah, bh, bhconst); + tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); + } + tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1); break; default: - lab = gen_new_label(); - + /* <= : ah < bh | (ah == bh && al <= bl) */ + tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst); tcg_out_cmp(s, ah, bh, bhconst); - tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), lab); - tcg_out_movi_imm13(s, ret, 1); - tcg_out_branch_i32(s, INSN_COND(COND_NE, 1), lab); - tcg_out_movi_imm13(s, ret, 0); + tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); + tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1); + tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1); + break; + } +} - tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), ret, al, bl, blconst); +static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, + TCGArg al, TCGArg ah, TCGArg bl, int blconst, + TCGArg bh, int bhconst, int opl, int oph) +{ + TCGArg tmp = TCG_REG_T1; - tcg_out_label(s, lab, s->code_ptr); - break; + /* Note that the low parts are fully consumed before tmp is set. */ + if (rl != ah && (bhconst || rl != bh)) { + tmp = rl; } + + tcg_out_arithc(s, tmp, al, bl, blconst, opl); + tcg_out_arithc(s, rh, ah, bh, bhconst, oph); + tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); } #endif @@ -859,8 +961,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) /* bne,pn %[xi]cc, label0 */ label_ptr[0] = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1) - | ((TARGET_LONG_BITS == 64) << 21))); + tcg_out_bpcc0(s, COND_NE, BPCC_PN + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* TLB Hit. */ /* Load all 64-bits into an O/G register. */ @@ -875,8 +977,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) /* b,a,pt label1 */ label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1) - | (1 << 29) | (1 << 19))); + tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0); } else { /* The fast path is exactly one insn. Thus we can perform the entire TLB Hit in the (annulled) delay slot of the branch @@ -885,9 +986,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop) /* beq,a,pt %[xi]cc, label0 */ label_ptr[0] = NULL; label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) - | ((TARGET_LONG_BITS == 64) << 21) - | (1 << 29) | (1 << 19))); + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]); } @@ -976,7 +1076,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop) { int addrlo_idx = 1, datalo, datahi, addr_reg; #if defined(CONFIG_SOFTMMU) - int memi_idx, memi, n; + int memi_idx, memi, n, datafull; uint32_t *label_ptr; #endif @@ -993,23 +1093,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop) addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args, offsetof(CPUTLBEntry, addr_write)); + datafull = datalo; if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) { /* Reconstruct the full 64-bit value. */ tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); - datalo = TCG_REG_O2; + datafull = TCG_REG_O2; } /* The fast path is exactly one insn. Thus we can perform the entire TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ /* beq,a,pt %[xi]cc, label0 */ label_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) - | ((TARGET_LONG_BITS == 64) << 21) - | (1 << 29) | (1 << 19))); + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]); + tcg_out_ldst_rr(s, datafull, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]); /* TLB Miss. */ @@ -1098,7 +1198,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_nop(s); break; case INDEX_op_br: - tcg_out_branch_i32(s, COND_A, args[0]); + tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]); tcg_out_nop(s); break; case INDEX_op_movi_i32: @@ -1211,6 +1311,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_setcond_i32(s, args[3], args[0], args[1], args[2], const_args[2]); break; + case INDEX_op_movcond_i32: + tcg_out_movcond_i32(s, args[5], args[0], args[1], + args[2], const_args[2], args[3], const_args[3]); + break; #if TCG_TARGET_REG_BITS == 32 case INDEX_op_brcond2_i32: @@ -1224,16 +1328,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, args[4], const_args[4]); break; case INDEX_op_add2_i32: - tcg_out_arithc(s, args[0], args[2], args[4], const_args[4], - ARITH_ADDCC); - tcg_out_arithc(s, args[1], args[3], args[5], const_args[5], - ARITH_ADDX); + tcg_out_addsub2(s, args[0], args[1], args[2], args[3], + args[4], const_args[4], args[5], const_args[5], + ARITH_ADDCC, ARITH_ADDX); break; case INDEX_op_sub2_i32: - tcg_out_arithc(s, args[0], args[2], args[4], const_args[4], - ARITH_SUBCC); - tcg_out_arithc(s, args[1], args[3], args[5], const_args[5], - ARITH_SUBX); + tcg_out_addsub2(s, args[0], args[1], args[2], args[3], + args[4], const_args[4], args[5], const_args[5], + ARITH_SUBCC, ARITH_SUBX); break; case INDEX_op_mulu2_i32: tcg_out_arithc(s, args[0], args[2], args[3], const_args[3], @@ -1346,7 +1448,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_setcond_i64(s, args[3], args[0], args[1], args[2], const_args[2]); break; - + case INDEX_op_movcond_i64: + tcg_out_movcond_i64(s, args[5], args[0], args[1], + args[2], const_args[2], args[3], const_args[3]); + break; #endif gen_arith: tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c); @@ -1375,39 +1480,40 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_ld16u_i32, { "r", "r" } }, { INDEX_op_ld16s_i32, { "r", "r" } }, { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "rJ" } }, - { INDEX_op_mul_i32, { "r", "r", "rJ" } }, - { INDEX_op_div_i32, { "r", "r", "rJ" } }, - { INDEX_op_divu_i32, { "r", "r", "rJ" } }, - { INDEX_op_rem_i32, { "r", "r", "rJ" } }, - { INDEX_op_remu_i32, { "r", "r", "rJ" } }, - { INDEX_op_sub_i32, { "r", "r", "rJ" } }, - { INDEX_op_and_i32, { "r", "r", "rJ" } }, - { INDEX_op_andc_i32, { "r", "r", "rJ" } }, - { INDEX_op_or_i32, { "r", "r", "rJ" } }, - { INDEX_op_orc_i32, { "r", "r", "rJ" } }, - { INDEX_op_xor_i32, { "r", "r", "rJ" } }, - - { INDEX_op_shl_i32, { "r", "r", "rJ" } }, - { INDEX_op_shr_i32, { "r", "r", "rJ" } }, - { INDEX_op_sar_i32, { "r", "r", "rJ" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_mul_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_div_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_divu_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_rem_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_remu_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_and_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_andc_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_or_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_orc_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_xor_i32, { "r", "rZ", "rJ" } }, + + { INDEX_op_shl_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_shr_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sar_i32, { "r", "rZ", "rJ" } }, { INDEX_op_neg_i32, { "r", "rJ" } }, { INDEX_op_not_i32, { "r", "rJ" } }, - { INDEX_op_brcond_i32, { "r", "rJ" } }, - { INDEX_op_setcond_i32, { "r", "r", "rJ" } }, + { INDEX_op_brcond_i32, { "rZ", "rJ" } }, + { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, #if TCG_TARGET_REG_BITS == 32 - { INDEX_op_brcond2_i32, { "r", "r", "rJ", "rJ" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } }, - { INDEX_op_add2_i32, { "r", "r", "r", "r", "rJ", "rJ" } }, - { INDEX_op_sub2_i32, { "r", "r", "r", "r", "rJ", "rJ" } }, - { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } }, + { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, #endif #if TCG_TARGET_REG_BITS == 64 @@ -1420,27 +1526,27 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_ld32u_i64, { "r", "r" } }, { INDEX_op_ld32s_i64, { "r", "r" } }, { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "rJ" } }, - { INDEX_op_mul_i64, { "r", "r", "rJ" } }, - { INDEX_op_div_i64, { "r", "r", "rJ" } }, - { INDEX_op_divu_i64, { "r", "r", "rJ" } }, - { INDEX_op_rem_i64, { "r", "r", "rJ" } }, - { INDEX_op_remu_i64, { "r", "r", "rJ" } }, - { INDEX_op_sub_i64, { "r", "r", "rJ" } }, - { INDEX_op_and_i64, { "r", "r", "rJ" } }, - { INDEX_op_andc_i64, { "r", "r", "rJ" } }, - { INDEX_op_or_i64, { "r", "r", "rJ" } }, - { INDEX_op_orc_i64, { "r", "r", "rJ" } }, - { INDEX_op_xor_i64, { "r", "r", "rJ" } }, - - { INDEX_op_shl_i64, { "r", "r", "rJ" } }, - { INDEX_op_shr_i64, { "r", "r", "rJ" } }, - { INDEX_op_sar_i64, { "r", "r", "rJ" } }, + { INDEX_op_st8_i64, { "rZ", "r" } }, + { INDEX_op_st16_i64, { "rZ", "r" } }, + { INDEX_op_st32_i64, { "rZ", "r" } }, + { INDEX_op_st_i64, { "rZ", "r" } }, + + { INDEX_op_add_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_mul_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_div_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_divu_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_rem_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_remu_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_sub_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_and_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_andc_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_or_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_orc_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_xor_i64, { "r", "rZ", "rJ" } }, + + { INDEX_op_shl_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_shr_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_sar_i64, { "r", "rZ", "rJ" } }, { INDEX_op_neg_i64, { "r", "rJ" } }, { INDEX_op_not_i64, { "r", "rJ" } }, @@ -1448,8 +1554,9 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_ext32s_i64, { "r", "ri" } }, { INDEX_op_ext32u_i64, { "r", "ri" } }, - { INDEX_op_brcond_i64, { "r", "rJ" } }, - { INDEX_op_setcond_i64, { "r", "r", "rJ" } }, + { INDEX_op_brcond_i64, { "rZ", "rJ" } }, + { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } }, + { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } }, #endif #if TCG_TARGET_REG_BITS == 64 diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 4a17f1e118..0e7d398a41 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -62,8 +62,9 @@ typedef enum { TCG_REG_I7, } TCGReg; -#define TCG_CT_CONST_S11 0x100 -#define TCG_CT_CONST_S13 0x200 +#define TCG_CT_CONST_S11 0x100 +#define TCG_CT_CONST_S13 0x200 +#define TCG_CT_CONST_ZERO 0x400 /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_O6 @@ -99,7 +100,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 1 @@ -121,7 +122,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 -#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_movcond_i64 1 #endif #define TCG_AREG0 TCG_REG_I0 diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 551845801d..8100a5a2e0 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -25,6 +25,11 @@ int gen_new_label(void); +static inline void tcg_gen_op0(TCGOpcode opc) +{ + *gen_opc_ptr++ = opc; +} + static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1) { *gen_opc_ptr++ = opc; @@ -886,6 +891,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + /* Allow the optimizer room to replace add2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); } static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) @@ -893,6 +900,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + /* Allow the optimizer room to replace sub2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); } static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) @@ -1018,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), TCGV_LOW(arg1), TCGV_LOW(arg2)); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); @@ -1303,8 +1303,58 @@ static void tcg_liveness_analysis(TCGContext *s) break; case INDEX_op_end: break; - /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ + + case INDEX_op_add2_i32: + case INDEX_op_sub2_i32: + args -= 6; + nb_iargs = 4; + nb_oargs = 2; + /* Test if the high part of the operation is dead, but not + the low part. The result can be optimized to a simple + add or sub. This happens often for x86_64 guest when the + cpu mode is set to 32 bit. */ + if (dead_temps[args[1]]) { + if (dead_temps[args[0]]) { + goto do_remove; + } + /* Create the single operation plus nop. */ + if (op == INDEX_op_add2_i32) { + op = INDEX_op_add_i32; + } else { + op = INDEX_op_sub_i32; + } + gen_opc_buf[op_index] = op; + args[1] = args[2]; + args[2] = args[4]; + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3); + /* Fall through and mark the single-word operation live. */ + nb_iargs = 2; + nb_oargs = 1; + } + goto do_not_remove; + + case INDEX_op_mulu2_i32: + args -= 4; + nb_iargs = 2; + nb_oargs = 2; + /* Likewise, test for the high part of the operation dead. */ + if (dead_temps[args[1]]) { + if (dead_temps[args[0]]) { + goto do_remove; + } + gen_opc_buf[op_index] = op = INDEX_op_mul_i32; + args[1] = args[2]; + args[2] = args[3]; + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1); + /* Fall through and mark the single-word operation live. */ + nb_oargs = 1; + } + goto do_not_remove; + default: + /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ args -= def->nb_args; nb_iargs = def->nb_iargs; nb_oargs = def->nb_oargs; @@ -1318,6 +1368,7 @@ static void tcg_liveness_analysis(TCGContext *s) if (!dead_temps[arg]) goto do_not_remove; } + do_remove: tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args); #ifdef CONFIG_PROFILER s->del_op_count++; |