diff options
-rw-r--r-- | tcg/i386/tcg-target.c | 78 |
1 files changed, 48 insertions, 30 deletions
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 5e8c58b8c8..da7c3581ac 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -160,7 +160,12 @@ static inline int tcg_target_const_match(tcg_target_long val, #define OPC_ARITH_EvIz (0x81) #define OPC_ARITH_EvIb (0x83) +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) #define OPC_BSWAP (0xc8 | P_EXT) +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32 (0x48) +#define OPC_INC_r32 (0x40) #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ #define OPC_JCC_short (0x70) /* ... plus condition code */ #define OPC_JMP_long (0xe9) @@ -175,6 +180,7 @@ static inline int tcg_target_const_match(tcg_target_long val, #define OPC_SHIFT_1 (0xd1) #define OPC_SHIFT_Ib (0xc1) #define OPC_SHIFT_cl (0xd3) +#define OPC_TESTL (0x85) /* Group 1 opcode extensions for 0x80-0x83. */ #define ARITH_ADD 0 @@ -275,6 +281,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm, } } +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) +{ + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src); +} + static inline void tcg_out_mov(TCGContext *s, int ret, int arg) { if (arg != ret) { @@ -286,8 +298,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, int ret, int32_t arg) { if (arg == 0) { - /* xor r0,r0 */ - tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret); + tgen_arithr(s, ARITH_XOR, ret, ret); } else { tcg_out8(s, 0xb8 + ret); tcg_out32(s, arg); @@ -353,14 +364,15 @@ static inline void tcg_out_ext16s(TCGContext *s, int dest, int src) tcg_out_modrm(s, OPC_MOVSWL, dest, src); } -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf) +static inline void tgen_arithi(TCGContext *s, int c, int r0, + int32_t val, int cf) { - if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) { - /* inc */ - tcg_out_opc(s, 0x40 + r0); - } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) { - /* dec */ - tcg_out_opc(s, 0x48 + r0); + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce + partial flags update stalls on Pentium4 and are not recommended + by current Intel optimization manuals. */ + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { + int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32); + tcg_out_opc(s, opc + r0); } else if (val == (int8_t)val) { tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0); tcg_out8(s, val); @@ -433,12 +445,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, if (const_arg2) { if (arg2 == 0) { /* test r, r */ - tcg_out_modrm(s, 0x85, arg1, arg1); + tcg_out_modrm(s, OPC_TESTL, arg1, arg1); } else { tgen_arithi(s, ARITH_CMP, arg1, arg2, 0); } } else { - tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1); + tgen_arithr(s, ARITH_CMP, arg1, arg2); } } @@ -653,7 +665,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read)); /* cmp 0(r1), r0 */ - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0); + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0); tcg_out_mov(s, r0, addr_reg); @@ -669,7 +681,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, s->code_ptr++; /* cmp 4(r1), addr_reg2 */ - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4); + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); /* je label1 */ tcg_out8(s, OPC_JCC_short + JCC_JE); @@ -728,7 +740,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, *label1_ptr = s->code_ptr - label1_ptr - 1; /* add x(r1), r0 */ - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, + offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read)); #else r0 = addr_reg; @@ -845,7 +858,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write)); /* cmp 0(r1), r0 */ - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0); + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0); tcg_out_mov(s, r0, addr_reg); @@ -861,7 +874,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, s->code_ptr++; /* cmp 4(r1), addr_reg2 */ - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4); + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); /* je label1 */ tcg_out8(s, OPC_JCC_short + JCC_JE); @@ -942,7 +955,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, *label1_ptr = s->code_ptr - label1_ptr - 1; /* add x(r1), r0 */ - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, + offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write)); #else r0 = addr_reg; @@ -1094,7 +1108,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, if (const_args[2]) { tgen_arithi(s, c, args[0], args[2], 0); } else { - tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]); + tgen_arithr(s, c, args[0], args[2]); } break; case INDEX_op_mul_i32: @@ -1144,24 +1158,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, goto gen_shift32; case INDEX_op_add2_i32: - if (const_args[4]) + if (const_args[4]) { tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]); - if (const_args[5]) + } else { + tgen_arithr(s, ARITH_ADD, args[0], args[4]); + } + if (const_args[5]) { tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]); + } else { + tgen_arithr(s, ARITH_ADC, args[1], args[5]); + } break; case INDEX_op_sub2_i32: - if (const_args[4]) + if (const_args[4]) { tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]); - if (const_args[5]) + } else { + tgen_arithr(s, ARITH_SUB, args[0], args[4]); + } + if (const_args[5]) { tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]); + } else { + tgen_arithr(s, ARITH_SBB, args[1], args[5]); + } break; case INDEX_op_brcond_i32: tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], |