diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 3 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 6 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.c | 27 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 110 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 4 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.c | 6 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 342 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 25 | ||||
-rw-r--r-- | tcg/optimize.c | 361 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 81 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.c | 6 | ||||
-rw-r--r-- | tcg/s390/tcg-target.c | 5 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 6 | ||||
-rw-r--r-- | tcg/tcg.c | 16 | ||||
-rw-r--r-- | tcg/tci/tcg-target.c | 6 |
17 files changed, 644 insertions, 364 deletions
diff --git a/tcg/README b/tcg/README index d03ae05e34..33783ee17b 100644 --- a/tcg/README +++ b/tcg/README @@ -392,7 +392,8 @@ Exit the current TB and return the value t0 (word type). Exit the current TB and jump to the TB index 'index' (constant) if the current TB was linked to this TB. Otherwise execute the next -instructions. +instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued +at most once with each slot index per TB. * qemu_ld8u t0, t1, flags qemu_ld8s t0, t1, flags diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index aed3b53247..2bad0a2b17 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -145,12 +145,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 4; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c index 5385d45cdc..2c79c1081e 100644 --- a/tcg/hppa/tcg-target.c +++ b/tcg/hppa/tcg-target.c @@ -175,12 +175,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, *insn_ptr = insn; } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 4; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -912,6 +906,18 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret, tcg_out_mov(s, TCG_TYPE_I32, ret, scratch); } +static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret, + TCGArg c1, TCGArg c2, int c2const, + TCGArg v1, int v1const) +{ + tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, c1, c2, c2const); + if (v1const) { + tcg_out_movi(s, TCG_TYPE_I32, ret, v1); + } else { + tcg_out_mov(s, TCG_TYPE_I32, ret, v1); + } +} + #if defined(CONFIG_SOFTMMU) #include "../../softmmu_defs.h" @@ -1520,6 +1526,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, args[3], const_args[3], args[4], const_args[4]); break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], const_args[2], + args[3], const_args[3]); + break; + case INDEX_op_add2_i32: tcg_out_add2(s, args[0], args[1], args[2], args[3], args[4], args[5], const_args[4]); @@ -1628,6 +1639,10 @@ static const TCGTargetOpDef hppa_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rI" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } }, + /* ??? We can actually support a signed 14-bit arg3, but we + only have existing constraints for a signed 11-bit. */ + { INDEX_op_movcond_i32, { "r", "rZ", "rI", "rI", "0" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } }, { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } }, diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index 4defd28577..5351353719 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -96,7 +96,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index aa1fa9ff4c..122d63630c 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -75,9 +75,7 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_R8, TCG_REG_R9, #else - TCG_REG_EAX, - TCG_REG_EDX, - TCG_REG_ECX + /* 32 bit mode uses stack based calling convention (GCC default). */ #endif }; @@ -88,6 +86,18 @@ static const int tcg_target_call_oarg_regs[] = { #endif }; +/* Registers used with L constraint, which are the first argument + registers on x86_64, and two random call clobbered registers on + i386. */ +#if TCG_TARGET_REG_BITS == 64 +# define TCG_REG_L0 tcg_target_call_iarg_regs[0] +# define TCG_REG_L1 tcg_target_call_iarg_regs[1] +# define TCG_REG_L2 tcg_target_call_iarg_regs[2] +#else +# define TCG_REG_L0 TCG_REG_EAX +# define TCG_REG_L1 TCG_REG_EDX +#endif + static uint8_t *tb_ret_addr; static void patch_reloc(uint8_t *code_ptr, int type, @@ -114,16 +124,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - if (TCG_TARGET_REG_BITS == 64) { - return 6; - } - - return 0; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -179,16 +179,16 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) /* qemu_ld/st address constraint */ case 'L': ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { +#if TCG_TARGET_REG_BITS == 64 tcg_regset_set32(ct->u.regs, 0, 0xffff); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]); - tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]); - } else { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2); +#else tcg_regset_set32(ct->u.regs, 0, 0xff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX); - } + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); +#endif break; case 'e': @@ -1029,8 +1029,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, uint8_t **label_ptr, int which) { const int addrlo = args[addrlo_idx]; - const int r0 = tcg_target_call_iarg_regs[0]; - const int r1 = tcg_target_call_iarg_regs[1]; + const int r0 = TCG_REG_L0; + const int r1 = TCG_REG_L1; TCGType type = TCG_TYPE_I32; int rexw = 0; @@ -1192,8 +1192,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, - tcg_target_call_iarg_regs[0], 0, opc); + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc); /* jmp label2 */ tcg_out8(s, OPC_JMP_short); @@ -1226,14 +1225,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx], mem_index); /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], - TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); #endif tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); @@ -1299,11 +1294,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, use the ADDR32 prefix. For now, do nothing. */ if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, - tcg_target_call_iarg_regs[0], GUEST_BASE); - tgen_arithr(s, ARITH_ADD + P_REXW, - tcg_target_call_iarg_regs[0], base); - base = tcg_target_call_iarg_regs[0]; + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base); + base = TCG_REG_L0; offset = 0; } } @@ -1324,8 +1317,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, /* ??? Ideally we wouldn't need a scratch register. For user-only, we could perform the bswap twice to restore the original value instead of moving to the scratch. But as it is, the L constraint - means that the second argument reg is definitely free here. */ - int scratch = tcg_target_call_iarg_regs[1]; + means that TCG_REG_L1 is definitely free here. */ + const int scratch = TCG_REG_L1; switch (sizeop) { case 0: @@ -1398,8 +1391,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ - tcg_out_qemu_st_direct(s, data_reg, data_reg2, - tcg_target_call_iarg_regs[0], 0, opc); + tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc); /* jmp label2 */ tcg_out8(s, OPC_JMP_short); @@ -1434,18 +1426,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, stack_adjust += 4; #else tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), - tcg_target_call_iarg_regs[1], data_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index); + TCG_REG_L1, data_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index); stack_adjust = 0; /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], - TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0); + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); #endif tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); @@ -1472,11 +1460,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, use the ADDR32 prefix. For now, do nothing. */ if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, - tcg_target_call_iarg_regs[0], GUEST_BASE); - tgen_arithr(s, ARITH_ADD + P_REXW, - tcg_target_call_iarg_regs[0], base); - base = tcg_target_call_iarg_regs[0]; + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base); + base = TCG_REG_L0; offset = 0; } } @@ -2061,15 +2047,17 @@ static void tcg_target_qemu_prologue(TCGContext *s) #if TCG_TARGET_REG_BITS == 32 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); - tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP, - (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, + (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 + + stack_addend); #else tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); -#endif tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); +#endif /* TB epilogue */ tb_ret_addr = s->code_ptr; diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index b356d76312..ace63ba37b 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -67,7 +67,11 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_ESP #define TCG_TARGET_STACK_ALIGN 16 +#if defined(_WIN64) +#define TCG_TARGET_CALL_STACK_OFFSET 32 +#else #define TCG_TARGET_CALL_STACK_OFFSET 0 +#endif /* optional instructions */ #define TCG_TARGET_HAS_div2_i32 1 diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 1745038f14..dc9c12cf18 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -176,12 +176,6 @@ static const int tcg_target_call_oarg_regs[] = { TCG_REG_R8 }; -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 8; -} - /* * opcode formation */ diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 74db83d440..f70910a90f 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -68,7 +68,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif /* check if we really need so many registers :P */ -static const int tcg_target_reg_alloc_order[] = { +static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_S0, TCG_REG_S1, TCG_REG_S2, @@ -94,14 +94,14 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_V1 }; -static const int tcg_target_call_iarg_regs[4] = { +static const TCGReg tcg_target_call_iarg_regs[4] = { TCG_REG_A0, TCG_REG_A1, TCG_REG_A2, TCG_REG_A3 }; -static const int tcg_target_call_oarg_regs[2] = { +static const TCGReg tcg_target_call_oarg_regs[2] = { TCG_REG_V0, TCG_REG_V1 }; @@ -185,12 +185,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 4; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -278,6 +272,8 @@ static inline int tcg_target_const_match(tcg_target_long val, enum { OPC_BEQ = 0x04 << 26, OPC_BNE = 0x05 << 26, + OPC_BLEZ = 0x06 << 26, + OPC_BGTZ = 0x07 << 26, OPC_ADDIU = 0x09 << 26, OPC_SLTI = 0x0A << 26, OPC_SLTIU = 0x0B << 26, @@ -298,12 +294,16 @@ enum { OPC_SPECIAL = 0x00 << 26, OPC_SLL = OPC_SPECIAL | 0x00, OPC_SRL = OPC_SPECIAL | 0x02, + OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02, OPC_SRA = OPC_SPECIAL | 0x03, OPC_SLLV = OPC_SPECIAL | 0x04, OPC_SRLV = OPC_SPECIAL | 0x06, + OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, OPC_SRAV = OPC_SPECIAL | 0x07, OPC_JR = OPC_SPECIAL | 0x08, OPC_JALR = OPC_SPECIAL | 0x09, + OPC_MOVZ = OPC_SPECIAL | 0x0A, + OPC_MOVN = OPC_SPECIAL | 0x0B, OPC_MFHI = OPC_SPECIAL | 0x10, OPC_MFLO = OPC_SPECIAL | 0x12, OPC_MULT = OPC_SPECIAL | 0x18, @@ -319,7 +319,13 @@ enum { OPC_SLT = OPC_SPECIAL | 0x2A, OPC_SLTU = OPC_SPECIAL | 0x2B, + OPC_REGIMM = 0x01 << 26, + OPC_BLTZ = OPC_REGIMM | (0x00 << 16), + OPC_BGEZ = OPC_REGIMM | (0x01 << 16), + OPC_SPECIAL3 = 0x1f << 26, + OPC_INS = OPC_SPECIAL3 | 0x004, + OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, }; @@ -327,7 +333,8 @@ enum { /* * Type reg */ -static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int rt) +static inline void tcg_out_opc_reg(TCGContext *s, int opc, + TCGReg rd, TCGReg rs, TCGReg rt) { int32_t inst; @@ -341,7 +348,8 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int r /* * Type immediate */ -static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int imm) +static inline void tcg_out_opc_imm(TCGContext *s, int opc, + TCGReg rt, TCGReg rs, TCGArg imm) { int32_t inst; @@ -355,7 +363,8 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int i /* * Type branch */ -static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs) +static inline void tcg_out_opc_br(TCGContext *s, int opc, + TCGReg rt, TCGReg rs) { /* We pay attention here to not modify the branch target by reading the existing value and using it again. This ensure that caches and @@ -368,7 +377,8 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs) /* * Type sa */ -static inline void tcg_out_opc_sa(TCGContext *s, int opc, int rd, int rt, int sa) +static inline void tcg_out_opc_sa(TCGContext *s, int opc, + TCGReg rd, TCGReg rt, TCGArg sa) { int32_t inst; @@ -407,38 +417,47 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg) +static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) { +#ifdef _MIPS_ARCH_MIPS32R2 + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); +#else /* ret and arg can't be register at */ if (ret == TCG_REG_AT || arg == TCG_REG_AT) { tcg_abort(); } tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +#endif } -static inline void tcg_out_bswap16s(TCGContext *s, int ret, int arg) +static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) { +#ifdef _MIPS_ARCH_MIPS32R2 + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); +#else /* ret and arg can't be register at */ if (ret == TCG_REG_AT || arg == TCG_REG_AT) { tcg_abort(); } tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +#endif } -static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg) +static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) { +#ifdef _MIPS_ARCH_MIPS32R2 + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); +#else /* ret and arg must be different and can't be register at */ if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) { tcg_abort(); @@ -456,9 +475,10 @@ static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg) tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00); tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +#endif } -static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) +static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) { #ifdef _MIPS_ARCH_MIPS32R2 tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); @@ -468,7 +488,7 @@ static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) #endif } -static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) +static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) { #ifdef _MIPS_ARCH_MIPS32R2 tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); @@ -478,8 +498,8 @@ static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) #endif } -static inline void tcg_out_ldst(TCGContext *s, int opc, int arg, - int arg1, tcg_target_long arg2) +static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg, + TCGReg arg1, TCGArg arg2) { if (arg2 == (int16_t) arg2) { tcg_out_opc_imm(s, opc, arg, arg1, arg2); @@ -502,7 +522,7 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); } -static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) { if (val == (int16_t)val) { tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); @@ -543,7 +563,7 @@ DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg) #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG #define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ tcg_out_movi(s, TCG_TYPE_I32, A, arg); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, uint32_t arg) +DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg) #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG /* We don't use the macro for this one to avoid an unnecessary reg-reg @@ -573,8 +593,8 @@ static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num, #endif } -static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1, - int arg2, int label_index) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, + TCGArg arg2, int label_index) { TCGLabel *l = &s->labels[label_index]; @@ -586,32 +606,48 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1, tcg_out_opc_br(s, OPC_BNE, arg1, arg2); break; case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BLTZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); + tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_LTU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); break; case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BGEZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_GEU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); break; case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BLEZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_LEU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); break; case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 == 0) { + tcg_out_opc_br(s, OPC_BGTZ, 0, arg1); + } else { + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); + tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + } break; case TCG_COND_GTU: tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); @@ -631,8 +667,9 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1, /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1, - int arg2, int arg3, int arg4, int label_index) +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1, + TCGArg arg2, TCGArg arg3, TCGArg arg4, + int label_index) { void *label_ptr; @@ -694,8 +731,70 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1, reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret, - int arg1, int arg2) +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGArg c1, TCGArg c2, TCGArg v) +{ + switch (cond) { + case TCG_COND_EQ: + if (c1 == 0) { + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2); + } else if (c2 == 0) { + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1); + } else { + tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + } + break; + case TCG_COND_NE: + if (c1 == 0) { + tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2); + } else if (c2 == 0) { + tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1); + } else { + tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + } + break; + case TCG_COND_LT: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + case TCG_COND_LTU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + case TCG_COND_GE: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_GEU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_LE: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_LEU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + break; + case TCG_COND_GT: + tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + case TCG_COND_GTU: + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); + tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + break; + default: + tcg_abort(); + break; + } +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGArg arg1, TCGArg arg2) { switch (cond) { case TCG_COND_EQ: @@ -754,8 +853,8 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret, /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret, - int arg1, int arg2, int arg3, int arg4) +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4) { switch (cond) { case TCG_COND_EQ: @@ -842,18 +941,17 @@ static const void * const qemu_st_helpers[4] = { static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { - int addr_regl, addr_meml; - int data_regl, data_regh, data_reg1, data_reg2; - int mem_index, s_bits; + TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; #if defined(CONFIG_SOFTMMU) void *label1_ptr, *label2_ptr; int arg_num; -#endif -#if TARGET_LONG_BITS == 64 -# if defined(CONFIG_SOFTMMU) + int mem_index, s_bits; + int addr_meml; +# if TARGET_LONG_BITS == 64 uint8_t *label3_ptr; + TCGReg addr_regh; + int addr_memh; # endif - int addr_regh, addr_memh; #endif data_regl = *args++; if (opc == 3) @@ -861,11 +959,22 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, else data_regh = 0; addr_regl = *args++; -#if TARGET_LONG_BITS == 64 +#if defined(CONFIG_SOFTMMU) +# if TARGET_LONG_BITS == 64 addr_regh = *args++; -#endif +# if defined(TCG_TARGET_WORDS_BIGENDIAN) + addr_memh = 0; + addr_meml = 4; +# else + addr_memh = 4; + addr_meml = 0; +# endif +# else + addr_meml = 0; +# endif mem_index = *args; s_bits = opc & 3; +#endif if (opc == 3) { #if defined(TCG_TARGET_WORDS_BIGENDIAN) @@ -879,18 +988,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, data_reg1 = data_regl; data_reg2 = 0; } -#if TARGET_LONG_BITS == 64 -# if defined(TCG_TARGET_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -#else - addr_meml = 0; -#endif - #if defined(CONFIG_SOFTMMU) tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); @@ -1029,50 +1126,56 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { - int addr_regl, addr_meml; - int data_regl, data_regh, data_reg1, data_reg2; - int mem_index, s_bits; + TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; #if defined(CONFIG_SOFTMMU) uint8_t *label1_ptr, *label2_ptr; int arg_num; + int mem_index, s_bits; + int addr_meml; #endif #if TARGET_LONG_BITS == 64 # if defined(CONFIG_SOFTMMU) uint8_t *label3_ptr; + TCGReg addr_regh; + int addr_memh; # endif - int addr_regh, addr_memh; #endif - data_regl = *args++; if (opc == 3) { data_regh = *args++; -#if defined(TCG_TARGET_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; -#else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif } else { - data_reg1 = data_regl; - data_reg2 = 0; data_regh = 0; } addr_regl = *args++; -#if TARGET_LONG_BITS == 64 +#if defined(CONFIG_SOFTMMU) +# if TARGET_LONG_BITS == 64 addr_regh = *args++; -# if defined(TCG_TARGET_WORDS_BIGENDIAN) +# if defined(TCG_TARGET_WORDS_BIGENDIAN) addr_memh = 0; addr_meml = 4; -# else +# else addr_memh = 4; addr_meml = 0; -# endif -#else +# endif +# else addr_meml = 0; -#endif +# endif mem_index = *args; s_bits = opc; +#endif + + if (opc == 3) { +#if defined(TCG_TARGET_WORDS_BIGENDIAN) + data_reg1 = data_regh; + data_reg2 = data_regl; +#else + data_reg1 = data_regl; + data_reg2 = data_regh; +#endif + } else { + data_reg1 = data_regl; + data_reg2 = 0; + } #if defined(CONFIG_SOFTMMU) tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); @@ -1157,7 +1260,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, break; case 1: if (TCG_NEED_BSWAP) { - tcg_out_bswap16(s, TCG_REG_T0, data_reg1); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff); + tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0); tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0); } else { tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0); @@ -1377,6 +1481,31 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]); } break; + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]); + } else { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32); + tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]); + tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]); + } + break; + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]); + } else { + tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]); + } + break; + + /* The bswap routines do not work on non-R2 CPU. In that case + we let TCG generating the corresponding code. */ + case INDEX_op_bswap16_i32: + tcg_out_bswap16(s, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, args[0], args[1]); + break; case INDEX_op_ext8s_i32: tcg_out_ext8s(s, args[0], args[1]); @@ -1385,6 +1514,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_ext16s(s, args[0], args[1]); break; + case INDEX_op_deposit_i32: + tcg_out_opc_imm(s, OPC_INS, args[0], args[2], + ((args[3] + args[4] - 1) << 11) | (args[3] << 6)); + break; + case INDEX_op_brcond_i32: tcg_out_brcond(s, args[2], args[0], args[1], args[3]); break; @@ -1392,6 +1526,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]); break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]); + break; + case INDEX_op_setcond_i32: tcg_out_setcond(s, args[3], args[0], args[1], args[2]); break; @@ -1453,34 +1591,42 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_st16_i32, { "rZ", "r" } }, { INDEX_op_st_i32, { "rZ", "r" } }, - { INDEX_op_add_i32, { "r", "rZ", "rJZ" } }, + { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rJZ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i32, { "r", "rZ", "rIZ" } }, + { INDEX_op_and_i32, { "r", "rZ", "rI" } }, { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, { INDEX_op_not_i32, { "r", "rZ" } }, { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } }, - { INDEX_op_shl_i32, { "r", "rZ", "riZ" } }, - { INDEX_op_shr_i32, { "r", "rZ", "riZ" } }, - { INDEX_op_sar_i32, { "r", "rZ", "riZ" } }, + { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, { INDEX_op_ext8s_i32, { "r", "rZ" } }, { INDEX_op_ext16s_i32, { "r", "rZ" } }, + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + { INDEX_op_brcond_i32, { "rZ", "rZ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, #if TARGET_LONG_BITS == 32 @@ -1520,7 +1666,6 @@ static int tcg_target_callee_save_regs[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, - TCG_REG_GP, TCG_REG_FP, TCG_REG_RA, /* should be last for ABI compliance */ }; @@ -1530,11 +1675,15 @@ static void tcg_target_qemu_prologue(TCGContext *s) { int i, frame_size; - /* reserve some stack space */ + /* reserve some stack space, also for TCG temps. */ frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4 - + TCG_STATIC_CALL_ARGS_SIZE; + + TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long); frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1); + tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4 + + TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); /* TB prologue */ tcg_out_addi(s, TCG_REG_SP, -frame_size); @@ -1586,8 +1735,7 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0); /* internal use */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ tcg_add_target_add_op_defs(mips_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), - CPU_TEMP_BUF_NLONGS * sizeof(long)); } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 9c68a32582..d147e70eb1 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -80,17 +80,34 @@ typedef enum { #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_andc_i32 0 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_deposit_i32 0 + +/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ +#if defined(_MIPS_ARCH_MIPS4) || defined(_MIPS_ARCH_MIPS32) || \ + defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_LOONGSON2E) || \ + defined(_MIPS_ARCH_LOONGSON2F) +#define TCG_TARGET_HAS_movcond_i32 1 +#else #define TCG_TARGET_HAS_movcond_i32 0 +#endif + +/* optional instructions only implemented on MIPS32R2 */ +#ifdef _MIPS_ARCH_MIPS32R2 +#define TCG_TARGET_HAS_bswap16_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 +#define TCG_TARGET_HAS_rot_i32 1 +#define TCG_TARGET_HAS_deposit_i32 1 +#else +#define TCG_TARGET_HAS_bswap16_i32 0 +#define TCG_TARGET_HAS_bswap32_i32 0 +#define TCG_TARGET_HAS_rot_i32 0 +#define TCG_TARGET_HAS_deposit_i32 0 +#endif /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ diff --git a/tcg/optimize.c b/tcg/optimize.c index 1be7631672..35532a1e03 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -39,8 +39,6 @@ typedef enum { TCG_TEMP_UNDEF = 0, TCG_TEMP_CONST, TCG_TEMP_COPY, - TCG_TEMP_HAS_COPY, - TCG_TEMP_ANY } tcg_temp_state; struct tcg_temp_info { @@ -52,39 +50,19 @@ struct tcg_temp_info { static struct tcg_temp_info temps[TCG_MAX_TEMPS]; -/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some - class of equivalent temp's, a new representative should be chosen in this - class. */ -static void reset_temp(TCGArg temp, int nb_temps, int nb_globals) +/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove + the copy flag from the left temp. */ +static void reset_temp(TCGArg temp) { - int i; - TCGArg new_base = (TCGArg)-1; - if (temps[temp].state == TCG_TEMP_HAS_COPY) { - for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { - if (i >= nb_globals) { - temps[i].state = TCG_TEMP_HAS_COPY; - new_base = i; - break; - } - } - for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { - if (new_base == (TCGArg)-1) { - temps[i].state = TCG_TEMP_ANY; - } else { - temps[i].val = new_base; - } + if (temps[temp].state == TCG_TEMP_COPY) { + if (temps[temp].prev_copy == temps[temp].next_copy) { + temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; + } else { + temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; + temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; } - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; - } else if (temps[temp].state == TCG_TEMP_COPY) { - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; - new_base = temps[temp].val; - } - temps[temp].state = TCG_TEMP_ANY; - if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) { - temps[new_base].state = TCG_TEMP_ANY; } + temps[temp].state = TCG_TEMP_UNDEF; } static int op_bits(TCGOpcode op) @@ -107,33 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op) } } -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src, - int nb_temps, int nb_globals) +static TCGArg find_better_copy(TCGContext *s, TCGArg temp) +{ + TCGArg i; + + /* If this is already a global, we can't do better. */ + if (temp < s->nb_globals) { + return temp; + } + + /* Search for a global first. */ + for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { + if (i < s->nb_globals) { + return i; + } + } + + /* If it is a temp, search for a temp local. */ + if (!s->temps[temp].temp_local) { + for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { + if (s->temps[i].temp_local) { + return i; + } + } + } + + /* Failure to find a better representation, return the same temp. */ + return temp; +} + +static bool temps_are_copies(TCGArg arg1, TCGArg arg2) { - reset_temp(dst, nb_temps, nb_globals); - assert(temps[src].state != TCG_TEMP_COPY); - if (src >= nb_globals) { - assert(temps[src].state != TCG_TEMP_CONST); - if (temps[src].state != TCG_TEMP_HAS_COPY) { - temps[src].state = TCG_TEMP_HAS_COPY; + TCGArg i; + + if (arg1 == arg2) { + return true; + } + + if (temps[arg1].state != TCG_TEMP_COPY + || temps[arg2].state != TCG_TEMP_COPY) { + return false; + } + + for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { + if (i == arg2) { + return true; + } + } + + return false; +} + +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, + TCGArg dst, TCGArg src) +{ + reset_temp(dst); + assert(temps[src].state != TCG_TEMP_CONST); + + if (s->temps[src].type == s->temps[dst].type) { + if (temps[src].state != TCG_TEMP_COPY) { + temps[src].state = TCG_TEMP_COPY; temps[src].next_copy = src; temps[src].prev_copy = src; } temps[dst].state = TCG_TEMP_COPY; - temps[dst].val = src; temps[dst].next_copy = temps[src].next_copy; temps[dst].prev_copy = src; temps[temps[dst].next_copy].prev_copy = dst; temps[src].next_copy = dst; } + gen_args[0] = dst; gen_args[1] = src; } -static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val, - int nb_temps, int nb_globals) +static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) { - reset_temp(dst, nb_temps, nb_globals); + reset_temp(dst); temps[dst].state = TCG_TEMP_CONST; temps[dst].val = val; gen_args[0] = dst; @@ -264,58 +292,88 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) return res; } +/* Return 2 if the condition can't be simplified, and the result + of the condition (0 or 1) if it can */ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, TCGArg y, TCGCond c) { - switch (op_bits(op)) { - case 32: + if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { + switch (op_bits(op)) { + case 32: + switch (c) { + case TCG_COND_EQ: + return (uint32_t)temps[x].val == (uint32_t)temps[y].val; + case TCG_COND_NE: + return (uint32_t)temps[x].val != (uint32_t)temps[y].val; + case TCG_COND_LT: + return (int32_t)temps[x].val < (int32_t)temps[y].val; + case TCG_COND_GE: + return (int32_t)temps[x].val >= (int32_t)temps[y].val; + case TCG_COND_LE: + return (int32_t)temps[x].val <= (int32_t)temps[y].val; + case TCG_COND_GT: + return (int32_t)temps[x].val > (int32_t)temps[y].val; + case TCG_COND_LTU: + return (uint32_t)temps[x].val < (uint32_t)temps[y].val; + case TCG_COND_GEU: + return (uint32_t)temps[x].val >= (uint32_t)temps[y].val; + case TCG_COND_LEU: + return (uint32_t)temps[x].val <= (uint32_t)temps[y].val; + case TCG_COND_GTU: + return (uint32_t)temps[x].val > (uint32_t)temps[y].val; + } + break; + case 64: + switch (c) { + case TCG_COND_EQ: + return (uint64_t)temps[x].val == (uint64_t)temps[y].val; + case TCG_COND_NE: + return (uint64_t)temps[x].val != (uint64_t)temps[y].val; + case TCG_COND_LT: + return (int64_t)temps[x].val < (int64_t)temps[y].val; + case TCG_COND_GE: + return (int64_t)temps[x].val >= (int64_t)temps[y].val; + case TCG_COND_LE: + return (int64_t)temps[x].val <= (int64_t)temps[y].val; + case TCG_COND_GT: + return (int64_t)temps[x].val > (int64_t)temps[y].val; + case TCG_COND_LTU: + return (uint64_t)temps[x].val < (uint64_t)temps[y].val; + case TCG_COND_GEU: + return (uint64_t)temps[x].val >= (uint64_t)temps[y].val; + case TCG_COND_LEU: + return (uint64_t)temps[x].val <= (uint64_t)temps[y].val; + case TCG_COND_GTU: + return (uint64_t)temps[x].val > (uint64_t)temps[y].val; + } + break; + } + } else if (temps_are_copies(x, y)) { switch (c) { - case TCG_COND_EQ: - return (uint32_t)x == (uint32_t)y; - case TCG_COND_NE: - return (uint32_t)x != (uint32_t)y; - case TCG_COND_LT: - return (int32_t)x < (int32_t)y; - case TCG_COND_GE: - return (int32_t)x >= (int32_t)y; - case TCG_COND_LE: - return (int32_t)x <= (int32_t)y; case TCG_COND_GT: - return (int32_t)x > (int32_t)y; case TCG_COND_LTU: - return (uint32_t)x < (uint32_t)y; - case TCG_COND_GEU: - return (uint32_t)x >= (uint32_t)y; - case TCG_COND_LEU: - return (uint32_t)x <= (uint32_t)y; + case TCG_COND_LT: case TCG_COND_GTU: - return (uint32_t)x > (uint32_t)y; - } - break; - case 64: - switch (c) { - case TCG_COND_EQ: - return (uint64_t)x == (uint64_t)y; case TCG_COND_NE: - return (uint64_t)x != (uint64_t)y; - case TCG_COND_LT: - return (int64_t)x < (int64_t)y; + return 0; case TCG_COND_GE: - return (int64_t)x >= (int64_t)y; + case TCG_COND_GEU: case TCG_COND_LE: - return (int64_t)x <= (int64_t)y; - case TCG_COND_GT: - return (int64_t)x > (int64_t)y; + case TCG_COND_LEU: + case TCG_COND_EQ: + return 1; + } + } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { + switch (c) { case TCG_COND_LTU: - return (uint64_t)x < (uint64_t)y; + return 0; case TCG_COND_GEU: - return (uint64_t)x >= (uint64_t)y; - case TCG_COND_LEU: - return (uint64_t)x <= (uint64_t)y; - case TCG_COND_GTU: - return (uint64_t)x > (uint64_t)y; + return 1; + default: + return 2; } - break; + } else { + return 2; } fprintf(stderr, @@ -324,7 +382,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, tcg_abort(); } - /* Propagate constants and copies, fold constant expressions. */ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args, TCGOpDef *tcg_op_defs) @@ -338,10 +395,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. - If this temp is a copy of other ones then this equivalence class' - representative is kept in VALS' element. - If this temp is neither copy nor constant then corresponding VALS' - element is unused. */ + If this temp is a copy of other ones then the other copies are + available through the doubly linked circular list. */ nb_temps = s->nb_temps; nb_globals = s->nb_globals; @@ -353,11 +408,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, op = gen_opc_buf[op_index]; def = &tcg_op_defs[op]; /* Do copy propagation */ - if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) { - assert(op != INDEX_op_call); + if (op == INDEX_op_call) { + int nb_oargs = args[0] >> 16; + int nb_iargs = args[0] & 0xffff; + for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { + if (temps[args[i]].state == TCG_TEMP_COPY) { + args[i] = find_better_copy(s, args[i]); + } + } + } else { for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { if (temps[args[i]].state == TCG_TEMP_COPY) { - args[i] = temps[args[i]].val; + args[i] = find_better_copy(s, args[i]); } } } @@ -372,7 +434,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): - if (temps[args[1]].state == TCG_TEMP_CONST) { + /* Prefer the constant in second argument, and then the form + op a, a, b, which is better handled on non-RISC hosts. */ + if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2] + && temps[args[2]].state != TCG_TEMP_CONST)) { tmp = args[1]; args[1] = args[2]; args[2] = tmp; @@ -429,7 +494,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], 0); args += 3; gen_args += 2; continue; @@ -456,14 +521,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } if (temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0) { - if ((temps[args[0]].state == TCG_TEMP_COPY - && temps[args[0]].val == args[1]) - || args[0] == args[1]) { + if (temps_are_copies(args[0], args[1])) { gen_opc_buf[op_index] = INDEX_op_nop; } else { gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(gen_args, args[0], args[1], - nb_temps, nb_globals); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); gen_args += 2; } args += 3; @@ -481,7 +543,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], 0); args += 3; gen_args += 2; continue; @@ -495,13 +557,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, switch (op) { CASE_OP_32_64(or): CASE_OP_32_64(and): - if (args[1] == args[2]) { - if (args[1] == args[0]) { + if (temps_are_copies(args[1], args[2])) { + if (temps_are_copies(args[0], args[1])) { gen_opc_buf[op_index] = INDEX_op_nop; } else { gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps, - nb_globals); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); gen_args += 2; } args += 3; @@ -512,21 +573,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } + /* Simplify expression for "op r, a, a => movi r, 0" cases */ + switch (op) { + CASE_OP_32_64(sub): + CASE_OP_32_64(xor): + if (temps_are_copies(args[1], args[2])) { + gen_opc_buf[op_index] = op_to_movi(op); + tcg_opt_gen_movi(gen_args, args[0], 0); + gen_args += 2; + args += 3; + continue; + } + break; + default: + break; + } + /* Propagate constants through copy operations and do constant folding. Constants will be substituted to arguments by register allocator where needed and possible. Also detect copies. */ switch (op) { CASE_OP_32_64(mov): - if ((temps[args[1]].state == TCG_TEMP_COPY - && temps[args[1]].val == args[0]) - || args[0] == args[1]) { + if (temps_are_copies(args[0], args[1])) { args += 2; gen_opc_buf[op_index] = INDEX_op_nop; break; } if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(gen_args, args[0], args[1], - nb_temps, nb_globals); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); gen_args += 2; args += 2; break; @@ -538,7 +612,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args[1] = temps[args[1]].val; /* fallthrough */ CASE_OP_32_64(movi): - tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], args[1]); gen_args += 2; args += 2; break; @@ -553,9 +627,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (temps[args[1]].state == TCG_TEMP_CONST) { gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, 0); - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], tmp); } else { - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; } @@ -583,10 +657,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, temps[args[2]].val); - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; } else { - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; gen_args[2] = args[2]; @@ -594,16 +668,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } args += 3; break; - CASE_OP_32_64(setcond): + CASE_OP_32_64(deposit): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { gen_opc_buf[op_index] = op_to_movi(op); - tmp = do_constant_folding_cond(op, temps[args[1]].val, - temps[args[2]].val, args[3]); - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); + tmp = ((1ull << args[4]) - 1); + tmp = (temps[args[1]].val & ~(tmp << args[3])) + | ((temps[args[2]].val & tmp) << args[3]); + tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; } else { - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); + gen_args[0] = args[0]; + gen_args[1] = args[1]; + gen_args[2] = args[2]; + gen_args[3] = args[3]; + gen_args[4] = args[4]; + gen_args += 5; + } + args += 5; + break; + CASE_OP_32_64(setcond): + tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); + if (tmp != 2) { + gen_opc_buf[op_index] = op_to_movi(op); + tcg_opt_gen_movi(gen_args, args[0], tmp); + gen_args += 2; + } else { + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; gen_args[2] = args[2]; @@ -613,10 +705,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args += 4; break; CASE_OP_32_64(brcond): - if (temps[args[0]].state == TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST) { - if (do_constant_folding_cond(op, temps[args[0]].val, - temps[args[1]].val, args[2])) { + tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); + if (tmp != 2) { + if (tmp) { memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); gen_opc_buf[op_index] = INDEX_op_br; gen_args[0] = args[3]; @@ -626,7 +717,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } } else { memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; gen_args[2] = args[2]; @@ -636,27 +727,21 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args += 4; break; CASE_OP_32_64(movcond): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST) { - tmp = do_constant_folding_cond(op, temps[args[1]].val, - temps[args[2]].val, args[5]); - if (args[0] == args[4-tmp] - || (temps[args[4-tmp]].state == TCG_TEMP_COPY - && temps[args[4-tmp]].val == args[0])) { + tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); + if (tmp != 2) { + if (temps_are_copies(args[0], args[4-tmp])) { gen_opc_buf[op_index] = INDEX_op_nop; } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { gen_opc_buf[op_index] = op_to_movi(op); - tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val, - nb_temps, nb_globals); + tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); gen_args += 2; } else { gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(gen_args, args[0], args[4-tmp], - nb_temps, nb_globals); + tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); gen_args += 2; } } else { - reset_temp(args[0], nb_temps, nb_globals); + reset_temp(args[0]); gen_args[0] = args[0]; gen_args[1] = args[1]; gen_args[2] = args[2]; @@ -671,11 +756,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) { for (i = 0; i < nb_globals; i++) { - reset_temp(i, nb_temps, nb_globals); + reset_temp(i); } } for (i = 0; i < (args[0] >> 16); i++) { - reset_temp(args[i + 1], nb_temps, nb_globals); + reset_temp(args[i + 1]); } i = nb_call_args + 3; while (i) { @@ -694,7 +779,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); } else { for (i = 0; i < def->nb_oargs; i++) { - reset_temp(args[i], nb_temps, nb_globals); + reset_temp(args[i]); } } for (i = 0; i < def->nb_args; i++) { diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 26c4b33e60..90c275d698 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -221,12 +221,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static int tcg_target_get_call_iarg_regs_count(int flags) -{ - return ARRAY_SIZE (tcg_target_call_iarg_regs); -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -390,6 +384,7 @@ static int tcg_target_const_match(tcg_target_long val, #define ORC XO31(412) #define EQV XO31(284) #define NAND XO31(476) +#define ISEL XO31( 15) #define LBZX XO31( 87) #define LHZX XO31(279) @@ -1269,6 +1264,72 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args, ); } +static void tcg_out_movcond (TCGContext *s, TCGCond cond, + TCGArg dest, + TCGArg c1, TCGArg c2, + TCGArg v1, TCGArg v2, + int const_c2) +{ + tcg_out_cmp (s, cond, c1, c2, const_c2, 7); + + if (1) { + /* At least here on 7747A bit twiddling hacks are outperformed + by jumpy code (the testing was not scientific) */ + if (dest == v2) { + cond = tcg_invert_cond (cond); + v2 = v1; + } + else { + if (dest != v1) { + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); + } + } + /* Branch forward over one insn */ + tcg_out32 (s, tcg_to_bc[cond] | 8); + tcg_out_mov (s, TCG_TYPE_I32, dest, v2); + } + else { + /* isel version, "if (1)" above should be replaced once a way + to figure out availability of isel on the underlying + hardware is found */ + int tab, bc; + + switch (cond) { + case TCG_COND_EQ: + tab = TAB (dest, v1, v2); + bc = CR_EQ; + break; + case TCG_COND_NE: + tab = TAB (dest, v2, v1); + bc = CR_EQ; + break; + case TCG_COND_LTU: + case TCG_COND_LT: + tab = TAB (dest, v1, v2); + bc = CR_LT; + break; + case TCG_COND_GEU: + case TCG_COND_GE: + tab = TAB (dest, v2, v1); + bc = CR_LT; + break; + case TCG_COND_LEU: + case TCG_COND_LE: + tab = TAB (dest, v2, v1); + bc = CR_GT; + break; + case TCG_COND_GTU: + case TCG_COND_GT: + tab = TAB (dest, v1, v2); + bc = CR_GT; + break; + default: + tcg_abort (); + } + tcg_out32 (s, ISEL | tab | ((bc + 28) << 6)); + } +} + static void tcg_out_brcond (TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label_index) @@ -1826,6 +1887,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, ); break; + case INDEX_op_movcond_i32: + tcg_out_movcond (s, args[5], args[0], + args[1], args[2], + args[3], args[4], + const_args[2]); + break; + default: tcg_dump_ops (s); tcg_abort (); @@ -1922,6 +1990,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ext16u_i32, { "r", "r" } }, { INDEX_op_deposit_i32, { "r", "0", "r" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } }, { -1 }, }; diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 177eea1d79..3259d898ab 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -92,7 +92,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 337cd419f8..19944bc427 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -208,12 +208,6 @@ static void patch_reloc (uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static int tcg_target_get_call_iarg_regs_count (int flags) -{ - return ARRAY_SIZE (tcg_target_call_iarg_regs); -} - /* parse target specific constraints */ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) { diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index aac11d9b61..3b90605fb1 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -356,11 +356,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -static int tcg_target_get_call_iarg_regs_count(int flags) -{ - return sizeof(tcg_target_call_iarg_regs) / sizeof(int); -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 876da4f6b9..8fd7f86de0 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -155,12 +155,6 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } -/* maximum number of register used for input function arguments */ -static inline int tcg_target_get_call_iarg_regs_count(int flags) -{ - return 6; -} - /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -89,7 +89,6 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, tcg_target_long arg2); static int tcg_target_const_match(tcg_target_long val, const TCGArgConstraint *arg_ct); -static int tcg_target_get_call_iarg_regs_count(int flags); TCGOpDef tcg_op_defs[] = { #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, @@ -937,11 +936,7 @@ void tcg_dump_ops(TCGContext *s) args[nb_oargs + i])); } } - } else if (c == INDEX_op_movi_i32 -#if TCG_TARGET_REG_BITS == 64 - || c == INDEX_op_movi_i64 -#endif - ) { + } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) { tcg_target_ulong val; TCGHelperInfo *th; @@ -993,14 +988,11 @@ void tcg_dump_ops(TCGContext *s) case INDEX_op_brcond_i32: case INDEX_op_setcond_i32: case INDEX_op_movcond_i32: -#if TCG_TARGET_REG_BITS == 32 case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: -#else case INDEX_op_brcond_i64: case INDEX_op_setcond_i64: case INDEX_op_movcond_i64: -#endif if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { qemu_log(",%s", cond_name[args[k++]]); } else { @@ -1861,7 +1853,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, flags = args[nb_oargs + nb_iargs]; - nb_regs = tcg_target_get_call_iarg_regs_count(flags); + nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); if (nb_regs > nb_params) nb_regs = nb_params; @@ -2103,16 +2095,12 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, #endif switch(opc) { case INDEX_op_mov_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_mov_i64: -#endif dead_args = s->op_dead_args[op_index]; tcg_reg_alloc_mov(s, def, args, dead_args); break; case INDEX_op_movi_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_movi_i64: -#endif tcg_reg_alloc_movi(s, args); break; case INDEX_op_debug_insn_start: diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index 003244cb0b..3f4a24bb8b 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -863,12 +863,6 @@ static int tcg_target_const_match(tcg_target_long val, return arg_ct->ct & TCG_CT_CONST; } -/* Maximum number of register used for input function arguments. */ -static int tcg_target_get_call_iarg_regs_count(int flags) -{ - return ARRAY_SIZE(tcg_target_call_iarg_regs); -} - static void tcg_target_init(TCGContext *s) { #if defined(CONFIG_DEBUG_TCG_INTERPRETER) |