diff options
author | Blue Swirl <blauwirbel@gmail.com> | 2013-02-23 17:21:41 +0000 |
---|---|---|
committer | Blue Swirl <blauwirbel@gmail.com> | 2013-02-23 17:21:41 +0000 |
commit | f708e736d0dafc05f8b7e9e73d6440c930b94686 (patch) | |
tree | e9239dde2923de6c0c8cab983d93969d08480df3 | |
parent | 6ab7e5465a4d6188e29398fb43a30dbab1015b75 (diff) | |
parent | f437d0a3c24e471a855da33a086fe529e09a06af (diff) |
Merge branch 'eflags3' of git://github.com/rth7680/qemu
* 'eflags3' of git://github.com/rth7680/qemu: (61 commits)
target-i386: Use movcond to implement shiftd.
target-i386: Discard CC_OP computation in set_cc_op also
target-i386: Use movcond to implement rotate flags.
target-i386: Use movcond to implement shift flags.
target-i386: Add CC_OP_CLR
target-i386: Implement tzcnt and fix lzcnt
target-i386: Use clz/ctz for bsf/bsr helpers
target-i386: Implement ADX extension
target-i386: Implement RORX
target-i386: Implement SHLX, SARX, SHRX
target-i386: Implement PDEP, PEXT
target-i386: Implement MULX
target-i386: Implement BZHI
target-i386: Implement BLSR, BLSMSK, BLSI
target-i386: Implement BEXTR
target-i386: Implement ANDN
target-i386: Implement MOVBE
target-i386: Decode the VEX prefixes
target-i386: Tidy prefix parsing
target-i386: Use CC_SRC2 for ADC and SBB
...
-rw-r--r-- | target-i386/cc_helper.c | 260 | ||||
-rw-r--r-- | target-i386/cc_helper_template.h | 261 | ||||
-rw-r--r-- | target-i386/cpu.c | 18 | ||||
-rw-r--r-- | target-i386/cpu.h | 26 | ||||
-rw-r--r-- | target-i386/helper.c | 13 | ||||
-rw-r--r-- | target-i386/helper.h | 13 | ||||
-rw-r--r-- | target-i386/int_helper.c | 69 | ||||
-rw-r--r-- | target-i386/shift_helper_template.h | 12 | ||||
-rw-r--r-- | target-i386/translate.c | 2637 | ||||
-rw-r--r-- | tests/tcg/test-i386.c | 10 |
10 files changed, 1870 insertions, 1449 deletions
diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index 9422003f24..9daa1a06b8 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -75,243 +75,247 @@ const uint8_t parity_table[256] = { #endif -static int compute_all_eflags(CPUX86State *env) +static target_ulong compute_all_adcx(target_ulong dst, target_ulong src1, + target_ulong src2) { - return CC_SRC; + return (src1 & ~CC_C) | (dst * CC_C); } -static int compute_c_eflags(CPUX86State *env) +static target_ulong compute_all_adox(target_ulong dst, target_ulong src1, + target_ulong src2) { - return CC_SRC & CC_C; + return (src1 & ~CC_O) | (src2 * CC_O); } -uint32_t helper_cc_compute_all(CPUX86State *env, int op) +static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1, + target_ulong src2) +{ + return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O); +} + +target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, + target_ulong src2, int op) { switch (op) { default: /* should never happen */ return 0; case CC_OP_EFLAGS: - return compute_all_eflags(env); + return src1; + case CC_OP_CLR: + return CC_Z; case CC_OP_MULB: - return compute_all_mulb(env); + return compute_all_mulb(dst, src1); case CC_OP_MULW: - return compute_all_mulw(env); + return compute_all_mulw(dst, src1); case CC_OP_MULL: - return compute_all_mull(env); + return compute_all_mull(dst, src1); case CC_OP_ADDB: - return compute_all_addb(env); + return compute_all_addb(dst, src1); case CC_OP_ADDW: - return compute_all_addw(env); + return compute_all_addw(dst, src1); case CC_OP_ADDL: - return compute_all_addl(env); + return compute_all_addl(dst, src1); case CC_OP_ADCB: - return compute_all_adcb(env); + return compute_all_adcb(dst, src1, src2); case CC_OP_ADCW: - return compute_all_adcw(env); + return compute_all_adcw(dst, src1, src2); case CC_OP_ADCL: - return compute_all_adcl(env); + return compute_all_adcl(dst, src1, src2); case CC_OP_SUBB: - return compute_all_subb(env); + return compute_all_subb(dst, src1); case CC_OP_SUBW: - return compute_all_subw(env); + return compute_all_subw(dst, src1); case CC_OP_SUBL: - return compute_all_subl(env); + return compute_all_subl(dst, src1); case CC_OP_SBBB: - return compute_all_sbbb(env); + return compute_all_sbbb(dst, src1, src2); case CC_OP_SBBW: - return compute_all_sbbw(env); + return compute_all_sbbw(dst, src1, src2); case CC_OP_SBBL: - return compute_all_sbbl(env); + return compute_all_sbbl(dst, src1, src2); case CC_OP_LOGICB: - return compute_all_logicb(env); + return compute_all_logicb(dst, src1); case CC_OP_LOGICW: - return compute_all_logicw(env); + return compute_all_logicw(dst, src1); case CC_OP_LOGICL: - return compute_all_logicl(env); + return compute_all_logicl(dst, src1); case CC_OP_INCB: - return compute_all_incb(env); + return compute_all_incb(dst, src1); case CC_OP_INCW: - return compute_all_incw(env); + return compute_all_incw(dst, src1); case CC_OP_INCL: - return compute_all_incl(env); + return compute_all_incl(dst, src1); case CC_OP_DECB: - return compute_all_decb(env); + return compute_all_decb(dst, src1); case CC_OP_DECW: - return compute_all_decw(env); + return compute_all_decw(dst, src1); case CC_OP_DECL: - return compute_all_decl(env); + return compute_all_decl(dst, src1); case CC_OP_SHLB: - return compute_all_shlb(env); + return compute_all_shlb(dst, src1); case CC_OP_SHLW: - return compute_all_shlw(env); + return compute_all_shlw(dst, src1); case CC_OP_SHLL: - return compute_all_shll(env); + return compute_all_shll(dst, src1); case CC_OP_SARB: - return compute_all_sarb(env); + return compute_all_sarb(dst, src1); case CC_OP_SARW: - return compute_all_sarw(env); + return compute_all_sarw(dst, src1); case CC_OP_SARL: - return compute_all_sarl(env); + return compute_all_sarl(dst, src1); + + case CC_OP_BMILGB: + return compute_all_bmilgb(dst, src1); + case CC_OP_BMILGW: + return compute_all_bmilgw(dst, src1); + case CC_OP_BMILGL: + return compute_all_bmilgl(dst, src1); + + case CC_OP_ADCX: + return compute_all_adcx(dst, src1, src2); + case CC_OP_ADOX: + return compute_all_adox(dst, src1, src2); + case CC_OP_ADCOX: + return compute_all_adcox(dst, src1, src2); #ifdef TARGET_X86_64 case CC_OP_MULQ: - return compute_all_mulq(env); - + return compute_all_mulq(dst, src1); case CC_OP_ADDQ: - return compute_all_addq(env); - + return compute_all_addq(dst, src1); case CC_OP_ADCQ: - return compute_all_adcq(env); - + return compute_all_adcq(dst, src1, src2); case CC_OP_SUBQ: - return compute_all_subq(env); - + return compute_all_subq(dst, src1); case CC_OP_SBBQ: - return compute_all_sbbq(env); - + return compute_all_sbbq(dst, src1, src2); case CC_OP_LOGICQ: - return compute_all_logicq(env); - + return compute_all_logicq(dst, src1); case CC_OP_INCQ: - return compute_all_incq(env); - + return compute_all_incq(dst, src1); case CC_OP_DECQ: - return compute_all_decq(env); - + return compute_all_decq(dst, src1); case CC_OP_SHLQ: - return compute_all_shlq(env); - + return compute_all_shlq(dst, src1); case CC_OP_SARQ: - return compute_all_sarq(env); + return compute_all_sarq(dst, src1); + case CC_OP_BMILGQ: + return compute_all_bmilgq(dst, src1); #endif } } uint32_t cpu_cc_compute_all(CPUX86State *env, int op) { - return helper_cc_compute_all(env, op); + return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op); } -uint32_t helper_cc_compute_c(CPUX86State *env, int op) +target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, + target_ulong src2, int op) { switch (op) { default: /* should never happen */ + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: + case CC_OP_CLR: return 0; case CC_OP_EFLAGS: - return compute_c_eflags(env); + case CC_OP_SARB: + case CC_OP_SARW: + case CC_OP_SARL: + case CC_OP_SARQ: + case CC_OP_ADOX: + return src1 & 1; + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + return src1; case CC_OP_MULB: - return compute_c_mull(env); case CC_OP_MULW: - return compute_c_mull(env); case CC_OP_MULL: - return compute_c_mull(env); + case CC_OP_MULQ: + return src1 != 0; + + case CC_OP_ADCX: + case CC_OP_ADCOX: + return dst; case CC_OP_ADDB: - return compute_c_addb(env); + return compute_c_addb(dst, src1); case CC_OP_ADDW: - return compute_c_addw(env); + return compute_c_addw(dst, src1); case CC_OP_ADDL: - return compute_c_addl(env); + return compute_c_addl(dst, src1); case CC_OP_ADCB: - return compute_c_adcb(env); + return compute_c_adcb(dst, src1, src2); case CC_OP_ADCW: - return compute_c_adcw(env); + return compute_c_adcw(dst, src1, src2); case CC_OP_ADCL: - return compute_c_adcl(env); + return compute_c_adcl(dst, src1, src2); case CC_OP_SUBB: - return compute_c_subb(env); + return compute_c_subb(dst, src1); case CC_OP_SUBW: - return compute_c_subw(env); + return compute_c_subw(dst, src1); case CC_OP_SUBL: - return compute_c_subl(env); + return compute_c_subl(dst, src1); case CC_OP_SBBB: - return compute_c_sbbb(env); + return compute_c_sbbb(dst, src1, src2); case CC_OP_SBBW: - return compute_c_sbbw(env); + return compute_c_sbbw(dst, src1, src2); case CC_OP_SBBL: - return compute_c_sbbl(env); - - case CC_OP_LOGICB: - return compute_c_logicb(); - case CC_OP_LOGICW: - return compute_c_logicw(); - case CC_OP_LOGICL: - return compute_c_logicl(); - - case CC_OP_INCB: - return compute_c_incl(env); - case CC_OP_INCW: - return compute_c_incl(env); - case CC_OP_INCL: - return compute_c_incl(env); - - case CC_OP_DECB: - return compute_c_incl(env); - case CC_OP_DECW: - return compute_c_incl(env); - case CC_OP_DECL: - return compute_c_incl(env); + return compute_c_sbbl(dst, src1, src2); case CC_OP_SHLB: - return compute_c_shlb(env); + return compute_c_shlb(dst, src1); case CC_OP_SHLW: - return compute_c_shlw(env); + return compute_c_shlw(dst, src1); case CC_OP_SHLL: - return compute_c_shll(env); + return compute_c_shll(dst, src1); - case CC_OP_SARB: - return compute_c_sarl(env); - case CC_OP_SARW: - return compute_c_sarl(env); - case CC_OP_SARL: - return compute_c_sarl(env); + case CC_OP_BMILGB: + return compute_c_bmilgb(dst, src1); + case CC_OP_BMILGW: + return compute_c_bmilgw(dst, src1); + case CC_OP_BMILGL: + return compute_c_bmilgl(dst, src1); #ifdef TARGET_X86_64 - case CC_OP_MULQ: - return compute_c_mull(env); - case CC_OP_ADDQ: - return compute_c_addq(env); - + return compute_c_addq(dst, src1); case CC_OP_ADCQ: - return compute_c_adcq(env); - + return compute_c_adcq(dst, src1, src2); case CC_OP_SUBQ: - return compute_c_subq(env); - + return compute_c_subq(dst, src1); case CC_OP_SBBQ: - return compute_c_sbbq(env); - - case CC_OP_LOGICQ: - return compute_c_logicq(); - - case CC_OP_INCQ: - return compute_c_incl(env); - - case CC_OP_DECQ: - return compute_c_incl(env); - + return compute_c_sbbq(dst, src1, src2); case CC_OP_SHLQ: - return compute_c_shlq(env); - - case CC_OP_SARQ: - return compute_c_sarl(env); + return compute_c_shlq(dst, src1); + case CC_OP_BMILGQ: + return compute_c_bmilgq(dst, src1); #endif } } @@ -326,7 +330,7 @@ target_ulong helper_read_eflags(CPUX86State *env) { uint32_t eflags; - eflags = helper_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env, CC_OP); eflags |= (DF & DF_MASK); eflags |= env->eflags & ~(VM_MASK | RF_MASK); return eflags; diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h index 1f94e11dcf..607311f195 100644 --- a/target-i386/cc_helper_template.h +++ b/target-i386/cc_helper_template.h @@ -18,258 +18,223 @@ */ #define DATA_BITS (1 << (3 + SHIFT)) -#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1)) #if DATA_BITS == 8 #define SUFFIX b #define DATA_TYPE uint8_t -#define DATA_MASK 0xff #elif DATA_BITS == 16 #define SUFFIX w #define DATA_TYPE uint16_t -#define DATA_MASK 0xffff #elif DATA_BITS == 32 #define SUFFIX l #define DATA_TYPE uint32_t -#define DATA_MASK 0xffffffff #elif DATA_BITS == 64 #define SUFFIX q #define DATA_TYPE uint64_t -#define DATA_MASK 0xffffffffffffffffULL #else #error unhandled operand size #endif +#define SIGN_MASK (((DATA_TYPE)1) << (DATA_BITS - 1)) + /* dynamic flags computation */ -static int glue(compute_all_add, SUFFIX)(CPUX86State *env) +static int glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_SRC; - src2 = CC_DST - CC_SRC; - cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src2 = dst - src1; + + cf = dst < src1; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_add, SUFFIX)(CPUX86State *env) +static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf; - target_long src1; - - src1 = CC_SRC; - cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; - return cf; + return dst < src1; } -static int glue(compute_all_adc, SUFFIX)(CPUX86State *env) +static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, + DATA_TYPE src3) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_SRC; - src2 = CC_DST - CC_SRC - 1; - cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src2 = dst - src1 - src3; + + cf = (src3 ? dst <= src1 : dst < src1); + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & 0x10; + zf = (dst == 0) << 6; + sf = lshift(dst, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_adc, SUFFIX)(CPUX86State *env) +static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, + DATA_TYPE src3) { - int cf; - target_long src1; - - src1 = CC_SRC; - cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; - return cf; + return src3 ? dst <= src1 : dst < src1; } -static int glue(compute_all_sub, SUFFIX)(CPUX86State *env) +static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src1 = dst + src2; + + cf = src1 < src2; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_sub, SUFFIX)(CPUX86State *env) +static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { - int cf; - target_long src1, src2; + DATA_TYPE src1 = dst + src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; - return cf; + return src1 < src2; } -static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env) +static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, + DATA_TYPE src3) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_DST + CC_SRC + 1; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src1 = dst + src2 + src3; + + cf = (src3 ? src1 <= src2 : src1 < src2); + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & 0x10; + zf = (dst == 0) << 6; + sf = lshift(dst, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_sbb, SUFFIX)(CPUX86State *env) +static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, + DATA_TYPE src3) { - int cf; - target_long src1, src2; + DATA_TYPE src1 = dst + src2 + src3; - src1 = CC_DST + CC_SRC + 1; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; - return cf; + return (src3 ? src1 <= src2 : src1 < src2); } -static int glue(compute_all_logic, SUFFIX)(CPUX86State *env) +static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; cf = 0; - pf = parity_table[(uint8_t)CC_DST]; + pf = parity_table[(uint8_t)dst]; af = 0; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = 0; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_logic, SUFFIX)(void) -{ - return 0; -} - -static int glue(compute_all_inc, SUFFIX)(CPUX86State *env) +static int glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - target_long src1, src2; + DATA_TYPE src2; - src1 = CC_DST - 1; + cf = src1; + src1 = dst - 1; src2 = 1; - cf = CC_SRC; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = (dst == SIGN_MASK) * CC_O; return cf | pf | af | zf | sf | of; } -#if DATA_BITS == 32 -static int glue(compute_c_inc, SUFFIX)(CPUX86State *env) -{ - return CC_SRC; -} -#endif - -static int glue(compute_all_dec, SUFFIX)(CPUX86State *env) +static int glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - target_long src1, src2; + DATA_TYPE src2; - src1 = CC_DST + 1; + cf = src1; + src1 = dst + 1; src2 = 1; - cf = CC_SRC; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = (dst == SIGN_MASK - 1) * CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_all_shl, SUFFIX)(CPUX86State *env) +static int glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C; - pf = parity_table[(uint8_t)CC_DST]; + cf = (src1 >> (DATA_BITS - 1)) & CC_C; + pf = parity_table[(uint8_t)dst]; af = 0; /* undefined */ - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - /* of is defined if shift count == 1 */ - of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + /* of is defined iff shift count == 1 */ + of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_shl, SUFFIX)(CPUX86State *env) -{ - return (CC_SRC >> (DATA_BITS - 1)) & CC_C; -} - -#if DATA_BITS == 32 -static int glue(compute_c_sar, SUFFIX)(CPUX86State *env) +static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - return CC_SRC & 1; + return (src1 >> (DATA_BITS - 1)) & CC_C; } -#endif -static int glue(compute_all_sar, SUFFIX)(CPUX86State *env) +static int glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - cf = CC_SRC & 1; - pf = parity_table[(uint8_t)CC_DST]; + cf = src1 & 1; + pf = parity_table[(uint8_t)dst]; af = 0; /* undefined */ - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - /* of is defined if shift count == 1 */ - of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + /* of is defined iff shift count == 1 */ + of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -#if DATA_BITS == 32 -static int glue(compute_c_mul, SUFFIX)(CPUX86State *env) +/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and + CF are modified and it is slower to do that. Note as well that we + don't truncate SRC1 for computing carry to DATA_TYPE. */ +static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) { - int cf; + int cf, pf, af, zf, sf, of; - cf = (CC_SRC != 0); - return cf; + cf = (src1 != 0); + pf = parity_table[(uint8_t)dst]; + af = 0; /* undefined */ + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = cf * CC_O; + return cf | pf | af | zf | sf | of; } -#endif -/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and - CF are modified and it is slower to do that. */ -static int glue(compute_all_mul, SUFFIX)(CPUX86State *env) +static int glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - cf = (CC_SRC != 0); - pf = parity_table[(uint8_t)CC_DST]; + cf = (src1 == 0); + pf = 0; /* undefined */ af = 0; /* undefined */ - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = cf << 11; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = 0; return cf | pf | af | zf | sf | of; } +static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +{ + return src1 == 0; +} + #undef DATA_BITS #undef SIGN_MASK #undef DATA_TYPE diff --git a/target-i386/cpu.c b/target-i386/cpu.c index dfcf86e862..5582e5f4e6 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -389,10 +389,15 @@ typedef struct x86_def_t { CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \ CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT | \ - CPUID_EXT_HYPERVISOR) + CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR) /* missing: - CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST, - CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */ + CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL, + CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2, + CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM, + CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_SSE41, CPUID_EXT_SSE42, + CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES, + CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, + CPUID_EXT_F16C, CPUID_EXT_RDRAND */ #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \ CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \ CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT) @@ -401,7 +406,12 @@ typedef struct x86_def_t { #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A) #define TCG_SVM_FEATURES 0 -#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP) +#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \ + CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX) + /* missing: + CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, + CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, + CPUID_7_0_EBX_RDSEED */ /* built-in CPU model definitions */ diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 7577e4f8bb..493dda8bb6 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -582,7 +582,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPU_INTERRUPT_TPR CPU_INTERRUPT_TGT_INT_3 -enum { +typedef enum { CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */ @@ -636,8 +636,19 @@ enum { CC_OP_SARL, CC_OP_SARQ, + CC_OP_BMILGB, /* Z,S via CC_DST, C = SRC==0; O=0; P,A undefined */ + CC_OP_BMILGW, + CC_OP_BMILGL, + CC_OP_BMILGQ, + + CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */ + CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */ + CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ + + CC_OP_CLR, /* Z set, all other flags clear. */ + CC_OP_NB, -}; +} CCOp; typedef struct SegmentCache { uint32_t selector; @@ -725,8 +736,9 @@ typedef struct CPUX86State { stored elsewhere */ /* emulator internal eflags handling */ - target_ulong cc_src; target_ulong cc_dst; + target_ulong cc_src; + target_ulong cc_src2; uint32_t cc_op; int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ uint32_t hflags; /* TB flags, see HF_xxx constants. These flags @@ -764,7 +776,6 @@ typedef struct CPUX86State { XMMReg xmm_regs[CPU_NB_REGS]; XMMReg xmm_t0; MMXReg mmx_t0; - target_ulong cc_tmp; /* temporary for rcr/rcl */ /* sysenter registers */ uint32_t sysenter_cs; @@ -1117,9 +1128,10 @@ static inline int cpu_mmu_index (CPUX86State *env) #define EIP (env->eip) #define DF (env->df) -#define CC_SRC (env->cc_src) -#define CC_DST (env->cc_dst) -#define CC_OP (env->cc_op) +#define CC_DST (env->cc_dst) +#define CC_SRC (env->cc_src) +#define CC_SRC2 (env->cc_src2) +#define CC_OP (env->cc_op) /* n must be a constant to be efficient */ static inline target_long lshift(target_long x, int n) diff --git a/target-i386/helper.c b/target-i386/helper.c index 4bf9db7f7d..82a731c77d 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -55,7 +55,7 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) /***********************************************************/ /* x86 debug */ -static const char *cc_op_str[] = { +static const char *cc_op_str[CC_OP_NB] = { "DYNAMIC", "EFLAGS", @@ -108,6 +108,17 @@ static const char *cc_op_str[] = { "SARW", "SARL", "SARQ", + + "BMILGB", + "BMILGW", + "BMILGL", + "BMILGQ", + + "ADCX", + "ADOX", + "ADCOX", + + "CLR", }; static void diff --git a/target-i386/helper.h b/target-i386/helper.h index 9ed720d0ed..26a0cc80a4 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -1,7 +1,7 @@ #include "exec/def-helper.h" -DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_NO_SE, i32, env, int) -DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_NO_SE, i32, env, int) +DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) +DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) DEF_HELPER_0(lock, void) DEF_HELPER_0(unlock, void) @@ -19,6 +19,7 @@ DEF_HELPER_2(imulq_EAX_T0, void, env, tl) DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl) DEF_HELPER_2(divq_EAX, void, env, tl) DEF_HELPER_2(idivq_EAX, void, env, tl) +DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, tl, tl, tl) #endif DEF_HELPER_2(aam, void, env, int) @@ -193,9 +194,11 @@ DEF_HELPER_3(fsave, void, env, tl, int) DEF_HELPER_3(frstor, void, env, tl, int) DEF_HELPER_3(fxsave, void, env, tl, int) DEF_HELPER_3(fxrstor, void, env, tl, int) -DEF_HELPER_1(bsf, tl, tl) -DEF_HELPER_1(bsr, tl, tl) -DEF_HELPER_2(lzcnt, tl, tl, int) + +DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) /* MMX/SSE */ diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c index 84b812dcca..3b56075a9d 100644 --- a/target-i386/int_helper.c +++ b/target-i386/int_helper.c @@ -385,6 +385,13 @@ void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0) CC_SRC = r1; } +target_ulong helper_umulh(target_ulong t0, target_ulong t1) +{ + uint64_t h, l; + mulu64(&l, &h, t0, t1); + return h; +} + void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0) { uint64_t r0, r1; @@ -440,45 +447,49 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0) } #endif +#if TARGET_LONG_BITS == 32 +# define ctztl ctz32 +# define clztl clz32 +#else +# define ctztl ctz64 +# define clztl clz64 +#endif + /* bit operations */ -target_ulong helper_bsf(target_ulong t0) +target_ulong helper_ctz(target_ulong t0) { - int count; - target_ulong res; - - res = t0; - count = 0; - while ((res & 1) == 0) { - count++; - res >>= 1; - } - return count; + return ctztl(t0); } -target_ulong helper_lzcnt(target_ulong t0, int wordsize) +target_ulong helper_clz(target_ulong t0) { - int count; - target_ulong res, mask; + return clztl(t0); +} - if (wordsize > 0 && t0 == 0) { - return wordsize; - } - res = t0; - count = TARGET_LONG_BITS - 1; - mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); - while ((res & mask) == 0) { - count--; - res <<= 1; - } - if (wordsize > 0) { - return wordsize - 1 - count; +target_ulong helper_pdep(target_ulong src, target_ulong mask) +{ + target_ulong dest = 0; + int i, o; + + for (i = 0; mask != 0; i++) { + o = ctztl(mask); + mask &= mask - 1; + dest |= ((src >> i) & 1) << o; } - return count; + return dest; } -target_ulong helper_bsr(target_ulong t0) +target_ulong helper_pext(target_ulong src, target_ulong mask) { - return helper_lzcnt(t0, 0); + target_ulong dest = 0; + int i, o; + + for (o = 0; mask != 0; o++) { + i = ctztl(mask); + mask &= mask - 1; + dest |= ((src >> i) & 1) << o; + } + return dest; } #define SHIFT 0 diff --git a/target-i386/shift_helper_template.h b/target-i386/shift_helper_template.h index dda0da30cf..cf91a2d284 100644 --- a/target-i386/shift_helper_template.h +++ b/target-i386/shift_helper_template.h @@ -55,7 +55,7 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0, count = rclb_table[count]; #endif if (count) { - eflags = helper_cc_compute_all(env, CC_OP); + eflags = env->cc_src; t0 &= DATA_MASK; src = t0; res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1)); @@ -63,11 +63,9 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0, res |= t0 >> (DATA_BITS + 1 - count); } t0 = res; - env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + env->cc_src = (eflags & ~(CC_C | CC_O)) | (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | ((src >> (DATA_BITS - count)) & CC_C); - } else { - env->cc_tmp = -1; } return t0; } @@ -86,7 +84,7 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0, count = rclb_table[count]; #endif if (count) { - eflags = helper_cc_compute_all(env, CC_OP); + eflags = env->cc_src; t0 &= DATA_MASK; src = t0; res = (t0 >> count) | @@ -95,11 +93,9 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0, res |= t0 << (DATA_BITS + 1 - count); } t0 = res; - env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + env->cc_src = (eflags & ~(CC_C | CC_O)) | (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | ((src >> (count - 1)) & CC_C); - } else { - env->cc_tmp = -1; } return t0; } diff --git a/target-i386/translate.c b/target-i386/translate.c index 112c3102a0..439d19efe0 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -23,6 +23,7 @@ #include <inttypes.h> #include <signal.h> +#include "qemu/host-utils.h" #include "cpu.h" #include "disas/disas.h" #include "tcg-op.h" @@ -36,6 +37,7 @@ #define PREFIX_LOCK 0x04 #define PREFIX_DATA 0x08 #define PREFIX_ADR 0x10 +#define PREFIX_VEX 0x20 #ifdef TARGET_X86_64 #define CODE64(s) ((s)->code64) @@ -47,21 +49,29 @@ #define REX_B(s) 0 #endif +#ifdef TARGET_X86_64 +# define ctztl ctz64 +# define clztl clz64 +#else +# define ctztl ctz32 +# define clztl clz32 +#endif + //#define MACRO_TEST 1 /* global register indexes */ static TCGv_ptr cpu_env; -static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp; +static TCGv cpu_A0; +static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; /* local temps */ -static TCGv cpu_T[2], cpu_T3; +static TCGv cpu_T[2]; /* local register indexes (only used inside old micro ops) */ static TCGv cpu_tmp0, cpu_tmp4; static TCGv_ptr cpu_ptr0, cpu_ptr1; static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; static TCGv_i64 cpu_tmp1_i64; -static TCGv cpu_tmp5; static uint8_t gen_opc_cc_op[OPC_BUF_SIZE]; @@ -88,8 +98,11 @@ typedef struct DisasContext { int code64; /* 64 bit code segment */ int rex_x, rex_b; #endif + int vex_l; /* vex vector length */ + int vex_v; /* vex vvvv register, without 1's compliment. */ int ss32; /* 32 bit stack segment */ - int cc_op; /* current CC operation */ + CCOp cc_op; /* current CC operation */ + bool cc_op_dirty; int addseg; /* non zero if either DS/ES/SS have a non zero base */ int f_st; /* currently unused */ int vm86; /* vm86 mode */ @@ -113,6 +126,7 @@ typedef struct DisasContext { static void gen_eob(DisasContext *s); static void gen_jmp(DisasContext *s, target_ulong eip); static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); +static void gen_op(DisasContext *s1, int op, int ot, int d); /* i386 arith/logic operations */ enum { @@ -173,6 +187,79 @@ enum { OR_A0, /* temporary register used when doing address evaluation */ }; +enum { + USES_CC_DST = 1, + USES_CC_SRC = 2, + USES_CC_SRC2 = 4, + USES_CC_SRCT = 8, +}; + +/* Bit set if the global variable is live after setting CC_OP to X. */ +static const uint8_t cc_op_live[CC_OP_NB] = { + [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, + [CC_OP_EFLAGS] = USES_CC_SRC, + [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, + [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT, + [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, + [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST, + [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2, + [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, + [CC_OP_CLR] = 0, +}; + +static void set_cc_op(DisasContext *s, CCOp op) +{ + int dead; + + if (s->cc_op == op) { + return; + } + + /* Discard CC computation that will no longer be used. */ + dead = cc_op_live[s->cc_op] & ~cc_op_live[op]; + if (dead & USES_CC_DST) { + tcg_gen_discard_tl(cpu_cc_dst); + } + if (dead & USES_CC_SRC) { + tcg_gen_discard_tl(cpu_cc_src); + } + if (dead & USES_CC_SRC2) { + tcg_gen_discard_tl(cpu_cc_src2); + } + if (dead & USES_CC_SRCT) { + tcg_gen_discard_tl(cpu_cc_srcT); + } + + if (op == CC_OP_DYNAMIC) { + /* The DYNAMIC setting is translator only, and should never be + stored. Thus we always consider it clean. */ + s->cc_op_dirty = false; + } else { + /* Discard any computed CC_OP value (see shifts). */ + if (s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_discard_i32(cpu_cc_op); + } + s->cc_op_dirty = true; + } + s->cc_op = op; +} + +static void gen_update_cc_op(DisasContext *s) +{ + if (s->cc_op_dirty) { + tcg_gen_movi_i32(cpu_cc_op, s->cc_op); + s->cc_op_dirty = false; + } +} + static inline void gen_op_movl_T0_0(void) { tcg_gen_movi_tl(cpu_T[0], 0); @@ -323,17 +410,17 @@ static inline void gen_op_mov_reg_T1(int ot, int reg) static inline void gen_op_mov_reg_A0(int size, int reg) { switch(size) { - case 0: + case OT_BYTE: tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16); break; default: /* XXX this shouldn't be reached; abort? */ - case 1: + case OT_WORD: /* For x86_64, this sets the higher half of register to zero. For i386, this is equivalent to a mov. */ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0); break; #ifdef TARGET_X86_64 - case 2: + case OT_LONG: tcg_gen_mov_tl(cpu_regs[reg], cpu_A0); break; #endif @@ -398,11 +485,11 @@ static inline void gen_op_jmp_T0(void) static inline void gen_op_add_reg_im(int size, int reg, int32_t val) { switch(size) { - case 0: + case OT_BYTE: tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16); break; - case 1: + case OT_WORD: tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); /* For x86_64, this sets the higher half of register to zero. For i386, this is equivalent to a nop. */ @@ -410,7 +497,7 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val) tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); break; #ifdef TARGET_X86_64 - case 2: + case OT_LONG: tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val); break; #endif @@ -420,11 +507,11 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val) static inline void gen_op_add_reg_T0(int size, int reg) { switch(size) { - case 0: + case OT_BYTE: tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16); break; - case 1: + case OT_WORD: tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); /* For x86_64, this sets the higher half of register to zero. For i386, this is equivalent to a nop. */ @@ -432,18 +519,13 @@ static inline void gen_op_add_reg_T0(int size, int reg) tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); break; #ifdef TARGET_X86_64 - case 2: + case OT_LONG: tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]); break; #endif } } -static inline void gen_op_set_cc_op(int32_t val) -{ - tcg_gen_movi_i32(cpu_cc_op, val); -} - static inline void gen_op_addl_A0_reg_sN(int shift, int reg) { tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); @@ -506,14 +588,14 @@ static inline void gen_op_lds_T0_A0(int idx) { int mem_index = (idx >> 2) - 1; switch(idx & 3) { - case 0: + case OT_BYTE: tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index); break; - case 1: + case OT_WORD: tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index); break; default: - case 2: + case OT_LONG: tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index); break; } @@ -523,17 +605,17 @@ static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0) { int mem_index = (idx >> 2) - 1; switch(idx & 3) { - case 0: + case OT_BYTE: tcg_gen_qemu_ld8u(t0, a0, mem_index); break; - case 1: + case OT_WORD: tcg_gen_qemu_ld16u(t0, a0, mem_index); break; - case 2: + case OT_LONG: tcg_gen_qemu_ld32u(t0, a0, mem_index); break; default: - case 3: + case OT_QUAD: /* Should never happen on 32-bit targets. */ #ifdef TARGET_X86_64 tcg_gen_qemu_ld64(t0, a0, mem_index); @@ -562,17 +644,17 @@ static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0) { int mem_index = (idx >> 2) - 1; switch(idx & 3) { - case 0: + case OT_BYTE: tcg_gen_qemu_st8(t0, a0, mem_index); break; - case 1: + case OT_WORD: tcg_gen_qemu_st16(t0, a0, mem_index); break; - case 2: + case OT_LONG: tcg_gen_qemu_st32(t0, a0, mem_index); break; default: - case 3: + case OT_QUAD: /* Should never happen on 32-bit targets. */ #ifdef TARGET_X86_64 tcg_gen_qemu_st64(t0, a0, mem_index); @@ -659,38 +741,45 @@ static inline void gen_op_movl_T0_Dshift(int ot) tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot); }; -static void gen_extu(int ot, TCGv reg) +static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign) { - switch(ot) { + switch (size) { case OT_BYTE: - tcg_gen_ext8u_tl(reg, reg); - break; + if (sign) { + tcg_gen_ext8s_tl(dst, src); + } else { + tcg_gen_ext8u_tl(dst, src); + } + return dst; case OT_WORD: - tcg_gen_ext16u_tl(reg, reg); - break; + if (sign) { + tcg_gen_ext16s_tl(dst, src); + } else { + tcg_gen_ext16u_tl(dst, src); + } + return dst; +#ifdef TARGET_X86_64 case OT_LONG: - tcg_gen_ext32u_tl(reg, reg); - break; + if (sign) { + tcg_gen_ext32s_tl(dst, src); + } else { + tcg_gen_ext32u_tl(dst, src); + } + return dst; +#endif default: - break; + return src; } } +static void gen_extu(int ot, TCGv reg) +{ + gen_ext_tl(reg, reg, ot, false); +} + static void gen_exts(int ot, TCGv reg) { - switch(ot) { - case OT_BYTE: - tcg_gen_ext8s_tl(reg, reg); - break; - case OT_WORD: - tcg_gen_ext16s_tl(reg, reg); - break; - case OT_LONG: - tcg_gen_ext32s_tl(reg, reg); - break; - default: - break; - } + gen_ext_tl(reg, reg, ot, true); } static inline void gen_op_jnz_ecx(int size, int label1) @@ -710,21 +799,31 @@ static inline void gen_op_jz_ecx(int size, int label1) static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n) { switch (ot) { - case 0: gen_helper_inb(v, n); break; - case 1: gen_helper_inw(v, n); break; - case 2: gen_helper_inl(v, n); break; + case OT_BYTE: + gen_helper_inb(v, n); + break; + case OT_WORD: + gen_helper_inw(v, n); + break; + case OT_LONG: + gen_helper_inl(v, n); + break; } - } static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n) { switch (ot) { - case 0: gen_helper_outb(v, n); break; - case 1: gen_helper_outw(v, n); break; - case 2: gen_helper_outl(v, n); break; + case OT_BYTE: + gen_helper_outb(v, n); + break; + case OT_WORD: + gen_helper_outw(v, n); + break; + case OT_LONG: + gen_helper_outl(v, n); + break; } - } static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, @@ -735,27 +834,25 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, state_saved = 0; if (s->pe && (s->cpl > s->iopl || s->vm86)) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); state_saved = 1; tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); switch (ot) { - case 0: + case OT_BYTE: gen_helper_check_iob(cpu_env, cpu_tmp2_i32); break; - case 1: + case OT_WORD: gen_helper_check_iow(cpu_env, cpu_tmp2_i32); break; - case 2: + case OT_LONG: gen_helper_check_iol(cpu_env, cpu_tmp2_i32); break; } } if(s->flags & HF_SVMI_MASK) { if (!state_saved) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); } svm_flags |= (1 << (4 + ot)); @@ -778,17 +875,8 @@ static inline void gen_movs(DisasContext *s, int ot) gen_op_add_reg_T0(s->aflag, R_EDI); } -static inline void gen_update_cc_op(DisasContext *s) -{ - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - s->cc_op = CC_OP_DYNAMIC; - } -} - static void gen_op_update1_cc(void) { - tcg_gen_discard_tl(cpu_cc_src); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } @@ -798,338 +886,392 @@ static void gen_op_update2_cc(void) tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } -static inline void gen_op_cmpl_T0_T1_cc(void) +static void gen_op_update3_cc(TCGv reg) { + tcg_gen_mov_tl(cpu_cc_src2, reg); tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); - tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } static inline void gen_op_testl_T0_T1_cc(void) { - tcg_gen_discard_tl(cpu_cc_src); tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); } static void gen_op_update_neg_cc(void) { - tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_movi_tl(cpu_cc_srcT, 0); } -/* compute eflags.C to reg */ -static void gen_compute_eflags_c(TCGv reg) +/* compute all eflags to cc_src */ +static void gen_compute_eflags(DisasContext *s) { - gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); + TCGv zero, dst, src1, src2; + int live, dead; + + if (s->cc_op == CC_OP_EFLAGS) { + return; + } + if (s->cc_op == CC_OP_CLR) { + tcg_gen_movi_tl(cpu_cc_src, CC_Z); + set_cc_op(s, CC_OP_EFLAGS); + return; + } + + TCGV_UNUSED(zero); + dst = cpu_cc_dst; + src1 = cpu_cc_src; + src2 = cpu_cc_src2; + + /* Take care to not read values that are not live. */ + live = cc_op_live[s->cc_op] & ~USES_CC_SRCT; + dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2); + if (dead) { + zero = tcg_const_tl(0); + if (dead & USES_CC_DST) { + dst = zero; + } + if (dead & USES_CC_SRC) { + src1 = zero; + } + if (dead & USES_CC_SRC2) { + src2 = zero; + } + } + + gen_update_cc_op(s); + gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op); + set_cc_op(s, CC_OP_EFLAGS); + + if (dead) { + tcg_temp_free(zero); + } } -/* compute all eflags to cc_src */ -static void gen_compute_eflags(TCGv reg) +typedef struct CCPrepare { + TCGCond cond; + TCGv reg; + TCGv reg2; + target_ulong imm; + target_ulong mask; + bool use_reg2; + bool no_setcond; +} CCPrepare; + +/* compute eflags.C to reg */ +static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) +{ + TCGv t0, t1; + int size, shift; + + switch (s->cc_op) { + case CC_OP_SUBB ... CC_OP_SUBQ: + /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */ + size = s->cc_op - CC_OP_SUBB; + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + /* If no temporary was used, be careful not to alias t1 and t0. */ + t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg; + tcg_gen_mov_tl(t0, cpu_cc_srcT); + gen_extu(size, t0); + goto add_sub; + + case CC_OP_ADDB ... CC_OP_ADDQ: + /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */ + size = s->cc_op - CC_OP_ADDB; + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); + add_sub: + return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0, + .reg2 = t1, .mask = -1, .use_reg2 = true }; + + case CC_OP_LOGICB ... CC_OP_LOGICQ: + case CC_OP_CLR: + return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + + case CC_OP_INCB ... CC_OP_INCQ: + case CC_OP_DECB ... CC_OP_DECQ: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = -1, .no_setcond = true }; + + case CC_OP_SHLB ... CC_OP_SHLQ: + /* (CC_SRC >> (DATA_BITS - 1)) & 1 */ + size = s->cc_op - CC_OP_SHLB; + shift = (8 << size) - 1; + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = (target_ulong)1 << shift }; + + case CC_OP_MULB ... CC_OP_MULQ: + return (CCPrepare) { .cond = TCG_COND_NE, + .reg = cpu_cc_src, .mask = -1 }; + + case CC_OP_BMILGB ... CC_OP_BMILGQ: + size = s->cc_op - CC_OP_BMILGB; + t0 = gen_ext_tl(reg, cpu_cc_src, size, false); + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; + + case CC_OP_ADCX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst, + .mask = -1, .no_setcond = true }; + + case CC_OP_EFLAGS: + case CC_OP_SARB ... CC_OP_SARQ: + /* CC_SRC & 1 */ + return (CCPrepare) { .cond = TCG_COND_NE, + .reg = cpu_cc_src, .mask = CC_C }; + + default: + /* The need to compute only C from CC_OP_DYNAMIC is important + in efficiently implementing e.g. INC at the start of a TB. */ + gen_update_cc_op(s); + gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, + cpu_cc_src2, cpu_cc_op); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, + .mask = -1, .no_setcond = true }; + } +} + +/* compute eflags.P to reg */ +static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg) { - gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op); - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); + gen_compute_eflags(s); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_P }; } -static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) +/* compute eflags.S to reg */ +static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - switch(jcc_op) { - case JCC_O: - gen_compute_eflags(cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); - break; - case JCC_B: - gen_compute_eflags_c(cpu_T[0]); - break; - case JCC_Z: - gen_compute_eflags(cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); - break; - case JCC_BE: - gen_compute_eflags(cpu_tmp0); - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6); - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); - break; - case JCC_S: - gen_compute_eflags(cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); - break; - case JCC_P: - gen_compute_eflags(cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); - break; - case JCC_L: - gen_compute_eflags(cpu_tmp0); - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ - tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */ - tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); - break; + switch (s->cc_op) { + case CC_OP_DYNAMIC: + gen_compute_eflags(s); + /* FALLTHRU */ + case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_S }; + case CC_OP_CLR: + return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: - case JCC_LE: - gen_compute_eflags(cpu_tmp0); - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ - tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */ - tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */ - tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4); - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); - break; + { + int size = (s->cc_op - CC_OP_ADDB) & 3; + TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true); + return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 }; + } } } -/* return true if setcc_slow is not needed (WARNING: must be kept in - sync with gen_jcc1) */ -static int is_fast_jcc_case(DisasContext *s, int b) +/* compute eflags.O to reg */ +static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) { - int jcc_op; - jcc_op = (b >> 1) & 7; - switch(s->cc_op) { - /* we optimize the cmp/jcc case */ - case CC_OP_SUBB: - case CC_OP_SUBW: - case CC_OP_SUBL: - case CC_OP_SUBQ: - if (jcc_op == JCC_O || jcc_op == JCC_P) - goto slow_jcc; - break; + switch (s->cc_op) { + case CC_OP_ADOX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, + .mask = -1, .no_setcond = true }; + case CC_OP_CLR: + return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + default: + gen_compute_eflags(s); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_O }; + } +} - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: - - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: - - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - if (jcc_op != JCC_Z && jcc_op != JCC_S) - goto slow_jcc; - break; +/* compute eflags.Z to reg */ +static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) +{ + switch (s->cc_op) { + case CC_OP_DYNAMIC: + gen_compute_eflags(s); + /* FALLTHRU */ + case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_Z }; + case CC_OP_CLR: + return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 }; default: - slow_jcc: - return 0; + { + int size = (s->cc_op - CC_OP_ADDB) & 3; + TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; + } } - return 1; } -/* generate a conditional jump to label 'l1' according to jump opcode +/* perform a conditional store into register 'reg' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ -static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) +static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) { int inv, jcc_op, size, cond; + CCPrepare cc; TCGv t0; inv = b & 1; jcc_op = (b >> 1) & 7; - switch(cc_op) { - /* we optimize the cmp/jcc case */ - case CC_OP_SUBB: - case CC_OP_SUBW: - case CC_OP_SUBL: - case CC_OP_SUBQ: - - size = cc_op - CC_OP_SUBB; - switch(jcc_op) { - case JCC_Z: - fast_jcc_z: - switch(size) { - case 0: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff); - t0 = cpu_tmp0; - break; - case 1: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff); - t0 = cpu_tmp0; - break; -#ifdef TARGET_X86_64 - case 2: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff); - t0 = cpu_tmp0; - break; -#endif - default: - t0 = cpu_cc_dst; - break; - } - tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1); - break; - case JCC_S: - fast_jcc_s: - switch(size) { - case 0: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); - break; - case 1: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); - break; -#ifdef TARGET_X86_64 - case 2: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); - break; -#endif - default: - tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, - 0, l1); - break; - } - break; - - case JCC_B: - cond = inv ? TCG_COND_GEU : TCG_COND_LTU; - goto fast_jcc_b; + switch (s->cc_op) { + case CC_OP_SUBB ... CC_OP_SUBQ: + /* We optimize relational operators for the cmp/jcc case. */ + size = s->cc_op - CC_OP_SUBB; + switch (jcc_op) { case JCC_BE: - cond = inv ? TCG_COND_GTU : TCG_COND_LEU; - fast_jcc_b: - tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); - switch(size) { - case 0: - t0 = cpu_tmp0; - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff); - tcg_gen_andi_tl(t0, cpu_cc_src, 0xff); - break; - case 1: - t0 = cpu_tmp0; - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff); - tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff); - break; -#ifdef TARGET_X86_64 - case 2: - t0 = cpu_tmp0; - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff); - tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff); - break; -#endif - default: - t0 = cpu_cc_src; - break; - } - tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT); + gen_extu(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4, + .reg2 = t0, .mask = -1, .use_reg2 = true }; break; - + case JCC_L: - cond = inv ? TCG_COND_GE : TCG_COND_LT; + cond = TCG_COND_LT; goto fast_jcc_l; case JCC_LE: - cond = inv ? TCG_COND_GT : TCG_COND_LE; + cond = TCG_COND_LE; fast_jcc_l: - tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); - switch(size) { - case 0: - t0 = cpu_tmp0; - tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4); - tcg_gen_ext8s_tl(t0, cpu_cc_src); - break; - case 1: - t0 = cpu_tmp0; - tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4); - tcg_gen_ext16s_tl(t0, cpu_cc_src); - break; -#ifdef TARGET_X86_64 - case 2: - t0 = cpu_tmp0; - tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4); - tcg_gen_ext32s_tl(t0, cpu_cc_src); - break; -#endif - default: - t0 = cpu_cc_src; - break; - } - tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT); + gen_exts(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); + cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4, + .reg2 = t0, .mask = -1, .use_reg2 = true }; break; - + default: goto slow_jcc; } break; - - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_ADCB: - case CC_OP_ADCW: - case CC_OP_ADCL: - case CC_OP_ADCQ: - - case CC_OP_SBBB: - case CC_OP_SBBW: - case CC_OP_SBBL: - case CC_OP_SBBQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: - - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: - - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - - case CC_OP_SARB: - case CC_OP_SARW: - case CC_OP_SARL: - case CC_OP_SARQ: - switch(jcc_op) { + + default: + slow_jcc: + /* This actually generates good code for JC, JZ and JS. */ + switch (jcc_op) { + case JCC_O: + cc = gen_prepare_eflags_o(s, reg); + break; + case JCC_B: + cc = gen_prepare_eflags_c(s, reg); + break; case JCC_Z: - size = (cc_op - CC_OP_ADDB) & 3; - goto fast_jcc_z; + cc = gen_prepare_eflags_z(s, reg); + break; + case JCC_BE: + gen_compute_eflags(s); + cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_Z | CC_C }; + break; case JCC_S: - size = (cc_op - CC_OP_ADDB) & 3; - goto fast_jcc_s; + cc = gen_prepare_eflags_s(s, reg); + break; + case JCC_P: + cc = gen_prepare_eflags_p(s, reg); + break; + case JCC_L: + gen_compute_eflags(s); + if (TCGV_EQUAL(reg, cpu_cc_src)) { + reg = cpu_tmp0; + } + tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ + tcg_gen_xor_tl(reg, reg, cpu_cc_src); + cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, + .mask = CC_S }; + break; default: - goto slow_jcc; + case JCC_LE: + gen_compute_eflags(s); + if (TCGV_EQUAL(reg, cpu_cc_src)) { + reg = cpu_tmp0; + } + tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ + tcg_gen_xor_tl(reg, reg, cpu_cc_src); + cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, + .mask = CC_S | CC_Z }; + break; } break; - default: - slow_jcc: - gen_setcc_slow_T0(s, jcc_op); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, - cpu_T[0], 0, l1); - break; + } + + if (inv) { + cc.cond = tcg_invert_cond(cc.cond); + } + return cc; +} + +static void gen_setcc1(DisasContext *s, int b, TCGv reg) +{ + CCPrepare cc = gen_prepare_cc(s, b, reg); + + if (cc.no_setcond) { + if (cc.cond == TCG_COND_EQ) { + tcg_gen_xori_tl(reg, cc.reg, 1); + } else { + tcg_gen_mov_tl(reg, cc.reg); + } + return; + } + + if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 && + cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) { + tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask)); + tcg_gen_andi_tl(reg, reg, 1); + return; + } + if (cc.mask != -1) { + tcg_gen_andi_tl(reg, cc.reg, cc.mask); + cc.reg = reg; + } + if (cc.use_reg2) { + tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2); + } else { + tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm); + } +} + +static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg) +{ + gen_setcc1(s, JCC_B << 1, reg); +} + +/* generate a conditional jump to label 'l1' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_jcc1_noeob(DisasContext *s, int b, int l1) +{ + CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]); + + if (cc.mask != -1) { + tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask); + cc.reg = cpu_T[0]; + } + if (cc.use_reg2) { + tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); + } else { + tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1); + } +} + +/* Generate a conditional jump to label 'l1' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. + A translation block must end soon. */ +static inline void gen_jcc1(DisasContext *s, int b, int l1) +{ + CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]); + + gen_update_cc_op(s); + if (cc.mask != -1) { + tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask); + cc.reg = cpu_T[0]; + } + set_cc_op(s, CC_OP_DYNAMIC); + if (cc.use_reg2) { + tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); + } else { + tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1); } } @@ -1168,21 +1310,19 @@ static inline void gen_lods(DisasContext *s, int ot) static inline void gen_scas(DisasContext *s, int ot) { - gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); gen_string_movl_A0_EDI(s); gen_op_ld_T1_A0(ot + s->mem_index); - gen_op_cmpl_T0_T1_cc(); + gen_op(s, OP_CMPL, ot, R_EAX); gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_EDI); } static inline void gen_cmps(DisasContext *s, int ot) { - gen_string_movl_A0_ESI(s); - gen_op_ld_T0_A0(ot + s->mem_index); gen_string_movl_A0_EDI(s); gen_op_ld_T1_A0(ot + s->mem_index); - gen_op_cmpl_T0_T1_cc(); + gen_string_movl_A0_ESI(s); + gen_op(s, OP_CMPL, ot, OR_TMP0); gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_ESI); gen_op_add_reg_T0(s->aflag, R_EDI); @@ -1256,8 +1396,8 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \ l2 = gen_jz_ecx_string(s, next_eip); \ gen_ ## op(s, ot); \ gen_op_add_reg_im(s->aflag, R_ECX, -1); \ - gen_op_set_cc_op(CC_OP_SUBB + ot); \ - gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \ + gen_update_cc_op(s); \ + gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \ if (!s->jmp_opt) \ gen_op_jz_ecx(s->aflag, l2); \ gen_jmp(s, cur_eip); \ @@ -1337,38 +1477,26 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) } switch(op) { case OP_ADCL: - if (s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); - gen_compute_eflags_c(cpu_tmp4); + gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) gen_op_mov_reg_T0(ot, d); else gen_op_st_T0_A0(ot + s1->mem_index); - tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); - tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); - tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot); - s1->cc_op = CC_OP_DYNAMIC; + gen_op_update3_cc(cpu_tmp4); + set_cc_op(s1, CC_OP_ADCB + ot); break; case OP_SBBL: - if (s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); - gen_compute_eflags_c(cpu_tmp4); + gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) gen_op_mov_reg_T0(ot, d); else gen_op_st_T0_A0(ot + s1->mem_index); - tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); - tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); - tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot); - s1->cc_op = CC_OP_DYNAMIC; + gen_op_update3_cc(cpu_tmp4); + set_cc_op(s1, CC_OP_SBBB + ot); break; case OP_ADDL: gen_op_addl_T0_T1(); @@ -1377,16 +1505,17 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update2_cc(); - s1->cc_op = CC_OP_ADDB + ot; + set_cc_op(s1, CC_OP_ADDB + ot); break; case OP_SUBL: + tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); if (d != OR_TMP0) gen_op_mov_reg_T0(ot, d); else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update2_cc(); - s1->cc_op = CC_OP_SUBB + ot; + set_cc_op(s1, CC_OP_SUBB + ot); break; default: case OP_ANDL: @@ -1396,7 +1525,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update1_cc(); - s1->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_ORL: tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); @@ -1405,7 +1534,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update1_cc(); - s1->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_XORL: tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]); @@ -1414,11 +1543,13 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update1_cc(); - s1->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_CMPL: - gen_op_cmpl_T0_T1_cc(); - s1->cc_op = CC_OP_SUBB + ot; + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); + set_cc_op(s1, CC_OP_SUBB + ot); break; } } @@ -1430,36 +1561,71 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_op_mov_TN_reg(ot, 0, d); else gen_op_ld_T0_A0(ot + s1->mem_index); - if (s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); + gen_compute_eflags_c(s1, cpu_cc_src); if (c > 0) { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); - s1->cc_op = CC_OP_INCB + ot; + set_cc_op(s1, CC_OP_INCB + ot); } else { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1); - s1->cc_op = CC_OP_DECB + ot; + set_cc_op(s1, CC_OP_DECB + ot); } if (d != OR_TMP0) gen_op_mov_reg_T0(ot, d); else gen_op_st_T0_A0(ot + s1->mem_index); - gen_compute_eflags_c(cpu_cc_src); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } -static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, - int is_right, int is_arith) +static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1, + TCGv count, bool is_right) { - target_ulong mask; - int shift_label; - TCGv t0, t1, t2; + TCGv_i32 z32, s32, oldop; + TCGv z_tl; + + /* Store the results into the CC variables. If we know that the + variable must be dead, store unconditionally. Otherwise we'll + need to not disrupt the current contents. */ + z_tl = tcg_const_tl(0); + if (cc_op_live[s->cc_op] & USES_CC_DST) { + tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl, + result, cpu_cc_dst); + } else { + tcg_gen_mov_tl(cpu_cc_dst, result); + } + if (cc_op_live[s->cc_op] & USES_CC_SRC) { + tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl, + shm1, cpu_cc_src); + } else { + tcg_gen_mov_tl(cpu_cc_src, shm1); + } + tcg_temp_free(z_tl); - if (ot == OT_QUAD) { - mask = 0x3f; + /* Get the two potential CC_OP values into temporaries. */ + tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); + if (s->cc_op == CC_OP_DYNAMIC) { + oldop = cpu_cc_op; } else { - mask = 0x1f; + tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op); + oldop = cpu_tmp3_i32; } + /* Conditionally store the CC_OP value. */ + z32 = tcg_const_i32(0); + s32 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(s32, count); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop); + tcg_temp_free_i32(z32); + tcg_temp_free_i32(s32); + + /* The CC_OP value is no longer predictable. */ + set_cc_op(s, CC_OP_DYNAMIC); +} + +static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, + int is_right, int is_arith) +{ + target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f); + /* load */ if (op1 == OR_TMP0) { gen_op_ld_T0_A0(ot + s->mem_index); @@ -1467,25 +1633,22 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_TN_reg(ot, 0, op1); } - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - - tcg_gen_andi_tl(t2, cpu_T[1], mask); + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask); + tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1); if (is_right) { if (is_arith) { gen_exts(ot, cpu_T[0]); - tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_sar_tl(cpu_T[0], cpu_T[0], t2); + tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]); } else { gen_extu(ot, cpu_T[0]); - tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], t2); + tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); } } else { - tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], t2); + tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); } /* store */ @@ -1495,52 +1658,13 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_reg_T0(ot, op1); } - /* update eflags if non zero shift */ - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - } - - tcg_gen_mov_tl(t1, cpu_T[0]); - - shift_label = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, shift_label); - - tcg_gen_addi_tl(t2, t2, -1); - tcg_gen_mov_tl(cpu_cc_dst, t1); - - if (is_right) { - if (is_arith) { - tcg_gen_sar_tl(cpu_cc_src, t0, t2); - } else { - tcg_gen_shr_tl(cpu_cc_src, t0, t2); - } - } else { - tcg_gen_shl_tl(cpu_cc_src, t0, t2); - } - - if (is_right) { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); - } else { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); - } - - gen_set_label(shift_label); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ - - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); + gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right); } static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, int is_right, int is_arith) { - int mask; - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + int mask = (ot == OT_QUAD ? 0x3f : 0x1f); /* load */ if (op1 == OR_TMP0) @@ -1576,10 +1700,7 @@ static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, if (op2 != 0) { tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); - if (is_right) - s->cc_op = CC_OP_SARB + ot; - else - s->cc_op = CC_OP_SHLB + ot; + set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); } } @@ -1591,187 +1712,180 @@ static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2) tcg_gen_shri_tl(ret, arg1, -arg2); } -static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, - int is_right) +static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right) { - target_ulong mask; - int label1, label2, data_bits; - TCGv t0, t1, t2, a0; - - /* XXX: inefficient, but we must use local temps */ - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f); + TCGv_i32 t0, t1; /* load */ if (op1 == OR_TMP0) { - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(ot + s->mem_index, t0, a0); + gen_op_ld_T0_A0(ot + s->mem_index); } else { - gen_op_mov_v_reg(ot, t0, op1); + gen_op_mov_TN_reg(ot, 0, op1); } - tcg_gen_mov_tl(t1, cpu_T[1]); - - tcg_gen_andi_tl(t1, t1, mask); - - /* Must test zero case to avoid using undefined behaviour in TCG - shifts. */ - label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1); - - if (ot <= OT_WORD) - tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1); - else - tcg_gen_mov_tl(cpu_tmp0, t1); - - gen_extu(ot, t0); - tcg_gen_mov_tl(t2, t0); + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask); - data_bits = 8 << ot; - /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX: - fix TCG definition) */ - if (is_right) { - tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0); - tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); - tcg_gen_shl_tl(t0, t0, cpu_tmp0); - } else { - tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0); - tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); - tcg_gen_shr_tl(t0, t0, cpu_tmp0); + switch (ot) { + case OT_BYTE: + /* Replicate the 8-bit input so that a 32-bit rotate works. */ + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101); + goto do_long; + case OT_WORD: + /* Replicate the 16-bit input so that a 32-bit rotate works. */ + tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16); + goto do_long; + do_long: +#ifdef TARGET_X86_64 + case OT_LONG: + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]); + if (is_right) { + tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); + } else { + tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); + } + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + break; +#endif + default: + if (is_right) { + tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else { + tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + break; } - tcg_gen_or_tl(t0, t0, cpu_tmp4); - gen_set_label(label1); /* store */ if (op1 == OR_TMP0) { - gen_op_st_v(ot + s->mem_index, t0, a0); + gen_op_st_T0_A0(ot + s->mem_index); } else { - gen_op_mov_reg_v(ot, op1, t0); + gen_op_mov_reg_T0(ot, op1); } - - /* update eflags */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - - label2 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2); - - gen_compute_eflags(cpu_cc_src); - tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); - tcg_gen_xor_tl(cpu_tmp0, t2, t0); - tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); - tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + + /* We'll need the flags computed into CC_SRC. */ + gen_compute_eflags(s); + + /* The value that was "rotated out" is now present at the other end + of the word. Compute C into CC_DST and O into CC_SRC2. Note that + since we've computed the flags into CC_SRC, these variables are + currently dead. */ if (is_right) { - tcg_gen_shri_tl(t0, t0, data_bits - 1); + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1); + tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask); + } else { + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask); + tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1); } - tcg_gen_andi_tl(t0, t0, CC_C); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); - - tcg_gen_discard_tl(cpu_cc_dst); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); - - gen_set_label(label2); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ - - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); - tcg_temp_free(a0); + tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); + tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); + + /* Now conditionally store the new CC_OP value. If the shift count + is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live. + Otherwise reuse CC_OP_ADCOX which have the C and O flags split out + exactly as we computed above. */ + t0 = tcg_const_i32(0); + t1 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t1, cpu_T[1]); + tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); + tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0, + cpu_tmp2_i32, cpu_tmp3_i32); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + + /* The CC_OP value is no longer predictable. */ + set_cc_op(s, CC_OP_DYNAMIC); } static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, int is_right) { - int mask; - int data_bits; - TCGv t0, t1, a0; - - /* XXX: inefficient, but we must use local temps */ - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + int mask = (ot == OT_QUAD ? 0x3f : 0x1f); + int shift; /* load */ if (op1 == OR_TMP0) { - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(ot + s->mem_index, t0, a0); + gen_op_ld_T0_A0(ot + s->mem_index); } else { - gen_op_mov_v_reg(ot, t0, op1); + gen_op_mov_TN_reg(ot, 0, op1); } - gen_extu(ot, t0); - tcg_gen_mov_tl(t1, t0); - op2 &= mask; - data_bits = 8 << ot; if (op2 != 0) { - int shift = op2 & ((1 << (3 + ot)) - 1); - if (is_right) { - tcg_gen_shri_tl(cpu_tmp4, t0, shift); - tcg_gen_shli_tl(t0, t0, data_bits - shift); - } - else { - tcg_gen_shli_tl(cpu_tmp4, t0, shift); - tcg_gen_shri_tl(t0, t0, data_bits - shift); + switch (ot) { +#ifdef TARGET_X86_64 + case OT_LONG: + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + if (is_right) { + tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2); + } else { + tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2); + } + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + break; +#endif + default: + if (is_right) { + tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2); + } else { + tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2); + } + break; + case OT_BYTE: + mask = 7; + goto do_shifts; + case OT_WORD: + mask = 15; + do_shifts: + shift = op2 & mask; + if (is_right) { + shift = mask + 1 - shift; + } + gen_extu(ot, cpu_T[0]); + tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; } - tcg_gen_or_tl(t0, t0, cpu_tmp4); } /* store */ if (op1 == OR_TMP0) { - gen_op_st_v(ot + s->mem_index, t0, a0); + gen_op_st_T0_A0(ot + s->mem_index); } else { - gen_op_mov_reg_v(ot, op1, t0); + gen_op_mov_reg_T0(ot, op1); } if (op2 != 0) { - /* update eflags */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - - gen_compute_eflags(cpu_cc_src); - tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); - tcg_gen_xor_tl(cpu_tmp0, t1, t0); - tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); - tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + /* Compute the flags into CC_SRC. */ + gen_compute_eflags(s); + + /* The value that was "rotated out" is now present at the other end + of the word. Compute C into CC_DST and O into CC_SRC2. Note that + since we've computed the flags into CC_SRC, these variables are + currently dead. */ if (is_right) { - tcg_gen_shri_tl(t0, t0, data_bits - 1); + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1); + tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask); + } else { + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask); + tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1); } - tcg_gen_andi_tl(t0, t0, CC_C); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); - - tcg_gen_discard_tl(cpu_cc_dst); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); - s->cc_op = CC_OP_EFLAGS; + tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); + tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); + set_cc_op(s, CC_OP_ADCOX); } - - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(a0); } /* XXX: add faster immediate = 1 case */ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, int is_right) { - int label1; - - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(s); + assert(s->cc_op == CC_OP_EFLAGS); /* load */ if (op1 == OR_TMP0) @@ -1781,34 +1895,34 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, if (is_right) { switch (ot) { - case 0: + case OT_BYTE: gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 1: + case OT_WORD: gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 2: + case OT_LONG: gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #ifdef TARGET_X86_64 - case 3: + case OT_QUAD: gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #endif } } else { switch (ot) { - case 0: + case OT_BYTE: gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 1: + case OT_WORD: gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 2: + case OT_LONG: gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #ifdef TARGET_X86_64 - case 3: + case OT_QUAD: gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #endif @@ -1819,146 +1933,92 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, gen_op_st_T0_A0(ot + s->mem_index); else gen_op_mov_reg_T0(ot, op1); - - /* update eflags */ - label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1); - - tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp); - tcg_gen_discard_tl(cpu_cc_dst); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); - - gen_set_label(label1); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ } /* XXX: add faster immediate case */ -static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, - int is_right) +static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1, + bool is_right, TCGv count_in) { - int label1, label2, data_bits; - target_ulong mask; - TCGv t0, t1, t2, a0; - - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + target_ulong mask = (ot == OT_QUAD ? 63 : 31); + TCGv count; /* load */ if (op1 == OR_TMP0) { - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(ot + s->mem_index, t0, a0); + gen_op_ld_T0_A0(ot + s->mem_index); } else { - gen_op_mov_v_reg(ot, t0, op1); + gen_op_mov_TN_reg(ot, 0, op1); } - tcg_gen_andi_tl(cpu_T3, cpu_T3, mask); - - tcg_gen_mov_tl(t1, cpu_T[1]); - tcg_gen_mov_tl(t2, cpu_T3); + count = tcg_temp_new(); + tcg_gen_andi_tl(count, count_in, mask); - /* Must test zero case to avoid using undefined behaviour in TCG - shifts. */ - label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); - - tcg_gen_addi_tl(cpu_tmp5, t2, -1); - if (ot == OT_WORD) { - /* Note: we implement the Intel behaviour for shift count > 16 */ + switch (ot) { + case OT_WORD: + /* Note: we implement the Intel behaviour for shift count > 16. + This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A + portion by constructing it as a 32-bit value. */ if (is_right) { - tcg_gen_andi_tl(t0, t0, 0xffff); - tcg_gen_shli_tl(cpu_tmp0, t1, 16); - tcg_gen_or_tl(t0, t0, cpu_tmp0); - tcg_gen_ext32u_tl(t0, t0); - - tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); - - /* only needed if count > 16, but a test would complicate */ - tcg_gen_subfi_tl(cpu_tmp5, 32, t2); - tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5); - - tcg_gen_shr_tl(t0, t0, t2); - - tcg_gen_or_tl(t0, t0, cpu_tmp0); + tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16); + tcg_gen_mov_tl(cpu_T[1], cpu_T[0]); + tcg_gen_mov_tl(cpu_T[0], cpu_tmp0); } else { - /* XXX: not optimal */ - tcg_gen_andi_tl(t0, t0, 0xffff); - tcg_gen_shli_tl(t1, t1, 16); - tcg_gen_or_tl(t1, t1, t0); - tcg_gen_ext32u_tl(t1, t1); - - tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); - tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5); - tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0); - tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5); - - tcg_gen_shl_tl(t0, t0, t2); - tcg_gen_subfi_tl(cpu_tmp5, 32, t2); - tcg_gen_shr_tl(t1, t1, cpu_tmp5); - tcg_gen_or_tl(t0, t0, t1); + tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16); } - } else { - data_bits = 8 << ot; + /* FALLTHRU */ +#ifdef TARGET_X86_64 + case OT_LONG: + /* Concatenate the two 32-bit values and use a 64-bit shift. */ + tcg_gen_subi_tl(cpu_tmp0, count, 1); if (is_right) { - if (ot == OT_LONG) - tcg_gen_ext32u_tl(t0, t0); - - tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); + tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count); + } else { + tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]); + tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count); + tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32); + tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32); + } + break; +#endif + default: + tcg_gen_subi_tl(cpu_tmp0, count, 1); + if (is_right) { + tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); - tcg_gen_shr_tl(t0, t0, t2); - tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); - tcg_gen_shl_tl(t1, t1, cpu_tmp5); - tcg_gen_or_tl(t0, t0, t1); - + tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count); + tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4); } else { - if (ot == OT_LONG) - tcg_gen_ext32u_tl(t1, t1); - - tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); - - tcg_gen_shl_tl(t0, t0, t2); - tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); - tcg_gen_shr_tl(t1, t1, cpu_tmp5); - tcg_gen_or_tl(t0, t0, t1); + tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + if (ot == OT_WORD) { + /* Only needed if count > 16, for Intel behaviour. */ + tcg_gen_subfi_tl(cpu_tmp4, 33, count); + tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4); + tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4); + } + + tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count); + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count); + tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4); } + tcg_gen_movi_tl(cpu_tmp4, 0); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4, + cpu_tmp4, cpu_T[1]); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + break; } - tcg_gen_mov_tl(t1, cpu_tmp4); - gen_set_label(label1); /* store */ if (op1 == OR_TMP0) { - gen_op_st_v(ot + s->mem_index, t0, a0); - } else { - gen_op_mov_reg_v(ot, op1, t0); - } - - /* update eflags */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - - label2 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2); - - tcg_gen_mov_tl(cpu_cc_src, t1); - tcg_gen_mov_tl(cpu_cc_dst, t0); - if (is_right) { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); + gen_op_st_T0_A0(ot + s->mem_index); } else { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); + gen_op_mov_reg_T0(ot, op1); } - gen_set_label(label2); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); - tcg_temp_free(a0); + gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right); + tcg_temp_free(count); } static void gen_shift(DisasContext *s1, int op, int ot, int d, int s) @@ -2362,24 +2422,21 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) static inline void gen_jcc(DisasContext *s, int b, target_ulong val, target_ulong next_eip) { - int l1, l2, cc_op; + int l1, l2; - cc_op = s->cc_op; - gen_update_cc_op(s); if (s->jmp_opt) { l1 = gen_new_label(); - gen_jcc1(s, cc_op, b, l1); - + gen_jcc1(s, b, l1); + gen_goto_tb(s, 0, next_eip); gen_set_label(l1); gen_goto_tb(s, 1, val); s->is_jmp = DISAS_TB_JUMP; } else { - l1 = gen_new_label(); l2 = gen_new_label(); - gen_jcc1(s, cc_op, b, l1); + gen_jcc1(s, b, l1); gen_jmp_im(next_eip); tcg_gen_br(l2); @@ -2391,32 +2448,32 @@ static inline void gen_jcc(DisasContext *s, int b, } } -static void gen_setcc(DisasContext *s, int b) +static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b, + int modrm, int reg) { - int inv, jcc_op, l1; - TCGv t0; + CCPrepare cc; - if (is_fast_jcc_case(s, b)) { - /* nominal case: we use a jump */ - /* XXX: make it faster by adding new instructions in TCG */ - t0 = tcg_temp_local_new(); - tcg_gen_movi_tl(t0, 0); - l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); - tcg_gen_movi_tl(t0, 1); - gen_set_label(l1); - tcg_gen_mov_tl(cpu_T[0], t0); - tcg_temp_free(t0); - } else { - /* slow case: it is more efficient not to generate a jump, - although it is questionnable whether this optimization is - worth to */ - inv = b & 1; - jcc_op = (b >> 1) & 7; - gen_setcc_slow_T0(s, jcc_op); - if (inv) { - tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); - } + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + + cc = gen_prepare_cc(s, b, cpu_T[1]); + if (cc.mask != -1) { + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, cc.reg, cc.mask); + cc.reg = t0; + } + if (!cc.use_reg2) { + cc.reg2 = tcg_const_tl(cc.imm); + } + + tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2, + cpu_T[0], cpu_regs[reg]); + gen_op_mov_reg_T0(ot, reg); + + if (cc.mask != -1) { + tcg_temp_free(cc.reg); + } + if (!cc.use_reg2) { + tcg_temp_free(cc.reg2); } } @@ -2442,8 +2499,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip) { if (s->pe && !s->vm86) { /* XXX: optimize by finding processor state dynamically */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32); @@ -2472,8 +2528,7 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start, /* no SVM activated; fast case */ if (likely(!(s->flags & HF_SVMI_MASK))) return; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type), tcg_const_i64(param)); @@ -2720,8 +2775,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno)); s->is_jmp = DISAS_TB_JUMP; @@ -2732,8 +2786,7 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) static void gen_interrupt(DisasContext *s, int intno, target_ulong cur_eip, target_ulong next_eip) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno), tcg_const_i32(next_eip - cur_eip)); @@ -2742,8 +2795,7 @@ static void gen_interrupt(DisasContext *s, int intno, static void gen_debug(DisasContext *s, target_ulong cur_eip) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); gen_helper_debug(cpu_env); s->is_jmp = DISAS_TB_JUMP; @@ -2753,8 +2805,7 @@ static void gen_debug(DisasContext *s, target_ulong cur_eip) if needed */ static void gen_eob(DisasContext *s) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); if (s->tb->flags & HF_INHIBIT_IRQ_MASK) { gen_helper_reset_inhibit_irq(cpu_env); } @@ -2775,8 +2826,9 @@ static void gen_eob(DisasContext *s) direct call to the next block may occur */ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) { + gen_update_cc_op(s); + set_cc_op(s, CC_OP_DYNAMIC); if (s->jmp_opt) { - gen_update_cc_op(s); gen_goto_tb(s, tb_num, eip); s->is_jmp = DISAS_TB_JUMP; } else { @@ -2912,8 +2964,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = { [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps, (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */ - [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */ - [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */ + /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */ + [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, + [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* MMX ops and their SSE extensions */ [0x60] = MMX_OP2(punpcklbw), @@ -3794,11 +3847,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, reg = ((modrm >> 3) & 7) | rex_r; gen_op_mov_reg_T0(OT_LONG, reg); break; + case 0x138: - if (s->prefix & PREFIX_REPNZ) - goto crc32; case 0x038: b = modrm; + if ((b & 0xf0) == 0xf0) { + goto do_0f_38_fx; + } modrm = cpu_ldub_code(env, s->pc++); rm = modrm & 7; reg = ((modrm >> 3) & 7) | rex_r; @@ -3867,39 +3922,420 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1); - if (b == 0x17) - s->cc_op = CC_OP_EFLAGS; + if (b == 0x17) { + set_cc_op(s, CC_OP_EFLAGS); + } break; - case 0x338: /* crc32 */ - crc32: - b = modrm; + + case 0x238: + case 0x338: + do_0f_38_fx: + /* Various integer extensions at 0f 38 f[0-f]. */ + b = modrm | (b1 << 8); modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; - if (b != 0xf0 && b != 0xf1) - goto illegal_op; - if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) - goto illegal_op; + switch (b) { + case 0x3f0: /* crc32 Gd,Eb */ + case 0x3f1: /* crc32 Gd,Ey */ + do_crc32: + if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) { + goto illegal_op; + } + if ((b & 0xff) == 0xf0) { + ot = OT_BYTE; + } else if (s->dflag != 2) { + ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG); + } else { + ot = OT_QUAD; + } - if (b == 0xf0) - ot = OT_BYTE; - else if (b == 0xf1 && s->dflag != 2) - if (s->prefix & PREFIX_DATA) - ot = OT_WORD; - else - ot = OT_LONG; - else - ot = OT_QUAD; + gen_op_mov_TN_reg(OT_LONG, 0, reg); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_helper_crc32(cpu_T[0], cpu_tmp2_i32, + cpu_T[0], tcg_const_i32(8 << ot)); - gen_op_mov_TN_reg(OT_LONG, 0, reg); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_helper_crc32(cpu_T[0], cpu_tmp2_i32, - cpu_T[0], tcg_const_i32(8 << ot)); + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + gen_op_mov_reg_T0(ot, reg); + break; - ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; - gen_op_mov_reg_T0(ot, reg); + case 0x1f0: /* crc32 or movbe */ + case 0x1f1: + /* For these insns, the f3 prefix is supposed to have priority + over the 66 prefix, but that's not what we implement above + setting b1. */ + if (s->prefix & PREFIX_REPNZ) { + goto do_crc32; + } + /* FALLTHRU */ + case 0x0f0: /* movbe Gy,My */ + case 0x0f1: /* movbe My,Gy */ + if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) { + goto illegal_op; + } + if (s->dflag != 2) { + ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG); + } else { + ot = OT_QUAD; + } + + /* Load the data incoming to the bswap. Note that the TCG + implementation of bswap requires the input be zero + extended. In the case of the loads, we simply know that + gen_op_ld_v via gen_ldst_modrm does that already. */ + if ((b & 1) == 0) { + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + } else { + switch (ot) { + case OT_WORD: + tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[reg]); + break; + default: + tcg_gen_ext32u_tl(cpu_T[0], cpu_regs[reg]); + break; + case OT_QUAD: + tcg_gen_mov_tl(cpu_T[0], cpu_regs[reg]); + break; + } + } + + switch (ot) { + case OT_WORD: + tcg_gen_bswap16_tl(cpu_T[0], cpu_T[0]); + break; + default: + tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + tcg_gen_bswap64_tl(cpu_T[0], cpu_T[0]); + break; +#endif + } + + if ((b & 1) == 0) { + gen_op_mov_reg_T0(ot, reg); + } else { + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + } + break; + + case 0x0f2: /* andn Gy, By, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]); + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + set_cc_op(s, CC_OP_LOGICB + ot); + break; + + case 0x0f7: /* bextr Gy, Ey, By */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + { + TCGv bound, zero; + + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + /* Extract START, and shift the operand. + Shifts larger than operand size get zeros. */ + tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0); + + bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31); + zero = tcg_const_tl(0); + tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound, + cpu_T[0], zero); + tcg_temp_free(zero); + + /* Extract the LEN into a mask. Lengths larger than + operand size get all ones. */ + tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8); + tcg_gen_ext8u_tl(cpu_A0, cpu_A0); + tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound, + cpu_A0, bound); + tcg_temp_free(bound); + tcg_gen_movi_tl(cpu_T[1], 1); + tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_A0); + tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + set_cc_op(s, CC_OP_LOGICB + ot); + } + break; + + case 0x0f5: /* bzhi Gy, Ey, By */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]); + { + TCGv bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31); + /* Note that since we're using BMILG (in order to get O + cleared) we need to store the inverse into C. */ + tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, + cpu_T[1], bound); + tcg_gen_movcond_tl(TCG_COND_GT, cpu_T[1], cpu_T[1], + bound, bound, cpu_T[1]); + tcg_temp_free(bound); + } + tcg_gen_movi_tl(cpu_A0, -1); + tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]); + tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0); + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + + case 0x3f6: /* mulx By, Gy, rdx, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + switch (ot) { + TCGv_i64 t0, t1; + default: + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); +#ifdef TARGET_X86_64 + tcg_gen_ext32u_i64(t0, cpu_T[0]); + tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]); +#else + tcg_gen_extu_i32_i64(t0, cpu_T[0]); + tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]); +#endif + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_tl(cpu_T[0], t0); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_tl(cpu_T[1], t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + gen_op_mov_reg_T0(OT_LONG, s->vex_v); + gen_op_mov_reg_T1(OT_LONG, reg); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]); + tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]); + gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]); + break; +#endif + } + break; + + case 0x3f5: /* pdep Gy, By, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + /* Note that by zero-extending the mask operand, we + automatically handle zero-extending the result. */ + if (s->dflag == 2) { + tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]); + } else { + tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]); + } + gen_helper_pdep(cpu_regs[reg], cpu_T[0], cpu_T[1]); + break; + + case 0x2f5: /* pext Gy, By, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + /* Note that by zero-extending the mask operand, we + automatically handle zero-extending the result. */ + if (s->dflag == 2) { + tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]); + } else { + tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]); + } + gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]); + break; + + case 0x1f6: /* adcx Gy, Ey */ + case 0x2f6: /* adox Gy, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) { + goto illegal_op; + } else { + TCGv carry_in, carry_out; + int end_op; + + ot = (s->dflag == 2 ? OT_QUAD : OT_LONG); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + + /* Re-use the carry-out from a previous round. */ + TCGV_UNUSED(carry_in); + carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2); + switch (s->cc_op) { + case CC_OP_ADCX: + if (b == 0x1f6) { + carry_in = cpu_cc_dst; + end_op = CC_OP_ADCX; + } else { + end_op = CC_OP_ADCOX; + } + break; + case CC_OP_ADOX: + if (b == 0x1f6) { + end_op = CC_OP_ADCOX; + } else { + carry_in = cpu_cc_src2; + end_op = CC_OP_ADOX; + } + break; + case CC_OP_ADCOX: + end_op = CC_OP_ADCOX; + carry_in = carry_out; + break; + default: + end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX); + break; + } + /* If we can't reuse carry-out, get it out of EFLAGS. */ + if (TCGV_IS_UNUSED(carry_in)) { + if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { + gen_compute_eflags(s); + } + carry_in = cpu_tmp0; + tcg_gen_shri_tl(carry_in, cpu_cc_src, + ctz32(b == 0x1f6 ? CC_C : CC_O)); + tcg_gen_andi_tl(carry_in, carry_in, 1); + } + + switch (ot) { +#ifdef TARGET_X86_64 + case OT_LONG: + /* If we know TL is 64-bit, and we want a 32-bit + result, just do everything in 64-bit arithmetic. */ + tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]); + tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]); + tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]); + tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in); + tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]); + tcg_gen_shri_i64(carry_out, cpu_T[0], 32); + break; +#endif + default: + /* Otherwise compute the carry-out in two steps. */ + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]); + tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4, + cpu_T[0], cpu_regs[reg]); + tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in); + tcg_gen_setcond_tl(TCG_COND_LTU, carry_out, + cpu_regs[reg], cpu_T[0]); + tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4); + break; + } + /* We began with all flags computed to CC_SRC, and we + have now placed the carry-out in CC_DST. All that + is left is to record the CC_OP. */ + set_cc_op(s, end_op); + } + break; + + case 0x1f7: /* shlx Gy, Ey, By */ + case 0x2f7: /* sarx Gy, Ey, By */ + case 0x3f7: /* shrx Gy, Ey, By */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = (s->dflag == 2 ? OT_QUAD : OT_LONG); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + if (ot == OT_QUAD) { + tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63); + } else { + tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31); + } + if (b == 0x1f7) { + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else if (b == 0x2f7) { + if (ot != OT_QUAD) { + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + } + tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else { + if (ot != OT_QUAD) { + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); + } + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + gen_op_mov_reg_T0(ot, reg); + break; + + case 0x0f3: + case 0x1f3: + case 0x2f3: + case 0x3f3: /* Group 17 */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + + switch (reg & 7) { + case 1: /* blsr By,Ey */ + tcg_gen_neg_tl(cpu_T[1], cpu_T[0]); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(ot, s->vex_v); + gen_op_update2_cc(); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + + case 2: /* blsmsk By,Ey */ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + + case 3: /* blsi By, Ey */ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + + default: + goto illegal_op; + } + break; + + default: + goto illegal_op; + } break; + case 0x03a: case 0x13a: b = modrm; @@ -4070,7 +4506,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, val = cpu_ldub_code(env, s->pc++); if ((b & 0xfc) == 0x60) { /* pcmpXstrX */ - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); if (s->dflag == 2) /* The helper must use entire 64-bit gp registers */ @@ -4081,6 +4517,38 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); break; + + case 0x33a: + /* Various integer extensions at 0f 3a f[0-f]. */ + b = modrm | (b1 << 8); + modrm = cpu_ldub_code(env, s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + switch (b) { + case 0x3f0: /* rorx Gy,Ey, Ib */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + b = cpu_ldub_code(env, s->pc++); + if (ot == OT_QUAD) { + tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63); + } else { + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + } + gen_op_mov_reg_T0(ot, reg); + break; + + default: + goto illegal_op; + } + break; + default: goto illegal_op; } @@ -4191,7 +4659,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; } if (b == 0x2e || b == 0x2f) { - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } } } @@ -4223,47 +4691,49 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, x86_64_hregs = 0; #endif s->rip_offset = 0; /* for relative ip address */ + s->vex_l = 0; + s->vex_v = 0; next_byte: b = cpu_ldub_code(env, s->pc); s->pc++; - /* check prefixes */ + /* Collect prefixes. */ + switch (b) { + case 0xf3: + prefixes |= PREFIX_REPZ; + goto next_byte; + case 0xf2: + prefixes |= PREFIX_REPNZ; + goto next_byte; + case 0xf0: + prefixes |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override = R_CS; + goto next_byte; + case 0x36: + s->override = R_SS; + goto next_byte; + case 0x3e: + s->override = R_DS; + goto next_byte; + case 0x26: + s->override = R_ES; + goto next_byte; + case 0x64: + s->override = R_FS; + goto next_byte; + case 0x65: + s->override = R_GS; + goto next_byte; + case 0x66: + prefixes |= PREFIX_DATA; + goto next_byte; + case 0x67: + prefixes |= PREFIX_ADR; + goto next_byte; #ifdef TARGET_X86_64 - if (CODE64(s)) { - switch (b) { - case 0xf3: - prefixes |= PREFIX_REPZ; - goto next_byte; - case 0xf2: - prefixes |= PREFIX_REPNZ; - goto next_byte; - case 0xf0: - prefixes |= PREFIX_LOCK; - goto next_byte; - case 0x2e: - s->override = R_CS; - goto next_byte; - case 0x36: - s->override = R_SS; - goto next_byte; - case 0x3e: - s->override = R_DS; - goto next_byte; - case 0x26: - s->override = R_ES; - goto next_byte; - case 0x64: - s->override = R_FS; - goto next_byte; - case 0x65: - s->override = R_GS; - goto next_byte; - case 0x66: - prefixes |= PREFIX_DATA; - goto next_byte; - case 0x67: - prefixes |= PREFIX_ADR; - goto next_byte; - case 0x40 ... 0x4f: + case 0x40 ... 0x4f: + if (CODE64(s)) { /* REX prefix */ rex_w = (b >> 3) & 1; rex_r = (b & 0x4) << 1; @@ -4272,58 +4742,85 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, x86_64_hregs = 1; /* select uniform byte register addressing */ goto next_byte; } + break; +#endif + case 0xc5: /* 2-byte VEX */ + case 0xc4: /* 3-byte VEX */ + /* VEX prefixes cannot be used except in 32-bit mode. + Otherwise the instruction is LES or LDS. */ + if (s->code32 && !s->vm86) { + static const int pp_prefix[4] = { + 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ + }; + int vex3, vex2 = cpu_ldub_code(env, s->pc); + + if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { + /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, + otherwise the instruction is LES or LDS. */ + break; + } + s->pc++; + + /* 4.1.1-4.1.3: No preceeding lock, 66, f2, f3, or rex prefixes. */ + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ + | PREFIX_LOCK | PREFIX_DATA)) { + goto illegal_op; + } +#ifdef TARGET_X86_64 + if (x86_64_hregs) { + goto illegal_op; + } +#endif + rex_r = (~vex2 >> 4) & 8; + if (b == 0xc5) { + vex3 = vex2; + b = cpu_ldub_code(env, s->pc++); + } else { +#ifdef TARGET_X86_64 + s->rex_x = (~vex2 >> 3) & 8; + s->rex_b = (~vex2 >> 2) & 8; +#endif + vex3 = cpu_ldub_code(env, s->pc++); + rex_w = (vex3 >> 7) & 1; + switch (vex2 & 0x1f) { + case 0x01: /* Implied 0f leading opcode bytes. */ + b = cpu_ldub_code(env, s->pc++) | 0x100; + break; + case 0x02: /* Implied 0f 38 leading opcode bytes. */ + b = 0x138; + break; + case 0x03: /* Implied 0f 3a leading opcode bytes. */ + b = 0x13a; + break; + default: /* Reserved for future use. */ + goto illegal_op; + } + } + s->vex_v = (~vex3 >> 3) & 0xf; + s->vex_l = (vex3 >> 2) & 1; + prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX; + } + break; + } + + /* Post-process prefixes. */ + if (prefixes & PREFIX_DATA) { + dflag ^= 1; + } + if (prefixes & PREFIX_ADR) { + aflag ^= 1; + } +#ifdef TARGET_X86_64 + if (CODE64(s)) { if (rex_w == 1) { /* 0x66 is ignored if rex.w is set */ dflag = 2; - } else { - if (prefixes & PREFIX_DATA) - dflag ^= 1; } - if (!(prefixes & PREFIX_ADR)) + if (!(prefixes & PREFIX_ADR)) { aflag = 2; - } else -#endif - { - switch (b) { - case 0xf3: - prefixes |= PREFIX_REPZ; - goto next_byte; - case 0xf2: - prefixes |= PREFIX_REPNZ; - goto next_byte; - case 0xf0: - prefixes |= PREFIX_LOCK; - goto next_byte; - case 0x2e: - s->override = R_CS; - goto next_byte; - case 0x36: - s->override = R_SS; - goto next_byte; - case 0x3e: - s->override = R_DS; - goto next_byte; - case 0x26: - s->override = R_ES; - goto next_byte; - case 0x64: - s->override = R_FS; - goto next_byte; - case 0x65: - s->override = R_GS; - goto next_byte; - case 0x66: - prefixes |= PREFIX_DATA; - goto next_byte; - case 0x67: - prefixes |= PREFIX_ADR; - goto next_byte; } - if (prefixes & PREFIX_DATA) - dflag ^= 1; - if (prefixes & PREFIX_ADR) - aflag ^= 1; } +#endif s->prefix = prefixes; s->aflag = aflag; @@ -4374,10 +4871,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } else if (op == OP_XORL && rm == reg) { xor_zero: /* xor reg, reg optimisation */ + set_cc_op(s, CC_OP_CLR); gen_op_movl_T0_0(); - s->cc_op = CC_OP_LOGICB + ot; gen_op_mov_reg_T0(ot, reg); - gen_op_update1_cc(); break; } else { opreg = rm; @@ -4490,7 +4986,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, val = insn_get(env, s, ot); gen_op_movl_T1_im(val); gen_op_testl_T0_T1_cc(); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); break; case 2: /* not */ tcg_gen_not_tl(cpu_T[0], cpu_T[0]); @@ -4508,7 +5004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_T0(ot, rm); } gen_op_update_neg_cc(); - s->cc_op = CC_OP_SUBB + ot; + set_cc_op(s, CC_OP_SUBB + ot); break; case 4: /* mul */ switch(ot) { @@ -4521,7 +5017,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_T0(OT_WORD, R_EAX); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); - s->cc_op = CC_OP_MULB; + set_cc_op(s, CC_OP_MULB); break; case OT_WORD: gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); @@ -4534,7 +5030,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); gen_op_mov_reg_T0(OT_WORD, R_EDX); tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); - s->cc_op = CC_OP_MULW; + set_cc_op(s, CC_OP_MULW); break; default: case OT_LONG: @@ -4566,12 +5062,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); } #endif - s->cc_op = CC_OP_MULL; + set_cc_op(s, CC_OP_MULL); break; #ifdef TARGET_X86_64 case OT_QUAD: gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]); - s->cc_op = CC_OP_MULQ; + set_cc_op(s, CC_OP_MULQ); break; #endif } @@ -4588,7 +5084,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); - s->cc_op = CC_OP_MULB; + set_cc_op(s, CC_OP_MULB); break; case OT_WORD: gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); @@ -4602,7 +5098,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); gen_op_mov_reg_T0(OT_WORD, R_EDX); - s->cc_op = CC_OP_MULW; + set_cc_op(s, CC_OP_MULW); break; default: case OT_LONG: @@ -4636,12 +5132,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); } #endif - s->cc_op = CC_OP_MULL; + set_cc_op(s, CC_OP_MULL); break; #ifdef TARGET_X86_64 case OT_QUAD: gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]); - s->cc_op = CC_OP_MULQ; + set_cc_op(s, CC_OP_MULQ); break; #endif } @@ -4761,8 +5257,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_ldu_T0_A0(OT_WORD + s->mem_index); do_lcall: if (s->pe && !s->vm86) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1], @@ -4788,8 +5283,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_ldu_T0_A0(OT_WORD + s->mem_index); do_ljmp: if (s->pe && !s->vm86) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1], @@ -4822,7 +5316,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); gen_op_mov_TN_reg(ot, 1, reg); gen_op_testl_T0_T1_cc(); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); break; case 0xa8: /* test eAX, Iv */ @@ -4836,7 +5330,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_TN_reg(ot, 0, OR_EAX); gen_op_movl_T1_im(val); gen_op_testl_T0_T1_cc(); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); break; case 0x98: /* CWDE/CBW */ @@ -4937,7 +5431,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); } gen_op_mov_reg_T0(ot, reg); - s->cc_op = CC_OP_MULB + ot; + set_cc_op(s, CC_OP_MULB + ot); break; case 0x1c0: case 0x1c1: /* xadd Ev, Gv */ @@ -4964,7 +5458,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_T1(ot, reg); } gen_op_update2_cc(); - s->cc_op = CC_OP_ADDB + ot; + set_cc_op(s, CC_OP_ADDB + ot); break; case 0x1b0: case 0x1b1: /* cmpxchg Ev, Gv */ @@ -4994,9 +5488,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = 0; /* avoid warning */ } label1 = gen_new_label(); - tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0); + tcg_gen_mov_tl(t2, cpu_regs[R_EAX]); + gen_extu(ot, t0); gen_extu(ot, t2); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); + tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1); label2 = gen_new_label(); if (mod == 3) { gen_op_mov_reg_v(ot, R_EAX, t0); @@ -5015,8 +5510,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } gen_set_label(label2); tcg_gen_mov_tl(cpu_cc_src, t0); - tcg_gen_mov_tl(cpu_cc_dst, t2); - s->cc_op = CC_OP_SUBB + ot; + tcg_gen_mov_tl(cpu_cc_srcT, t2); + tcg_gen_sub_tl(cpu_cc_dst, t2, t0); + set_cc_op(s, CC_OP_SUBB + ot); tcg_temp_free(t0); tcg_temp_free(t1); tcg_temp_free(t2); @@ -5033,8 +5529,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) goto illegal_op; gen_jmp_im(pc_start - s->cs_base); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); gen_helper_cmpxchg16b(cpu_env, cpu_A0); } else @@ -5043,12 +5538,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_features & CPUID_CX8)) goto illegal_op; gen_jmp_im(pc_start - s->cs_base); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); gen_helper_cmpxchg8b(cpu_env, cpu_A0); } - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; /**************************/ @@ -5452,13 +5946,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0xc4: /* les Gv */ - if (CODE64(s)) - goto illegal_op; + /* In CODE64 this is VEX3; see above. */ op = R_ES; goto do_lxx; case 0xc5: /* lds Gv */ - if (CODE64(s)) - goto illegal_op; + /* In CODE64 this is VEX2; see above. */ op = R_DS; goto do_lxx; case 0x1b2: /* lss Gv */ @@ -5569,12 +6061,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_TN_reg(ot, 1, reg); if (shift) { - val = cpu_ldub_code(env, s->pc++); - tcg_gen_movi_tl(cpu_T3, val); + TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++)); + gen_shiftd_rm_T1(s, ot, opreg, op, imm); + tcg_temp_free(imm); } else { - tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]); + gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]); } - gen_shiftd_rm_T1_T3(s, ot, opreg, op); break; /************************/ @@ -5717,8 +6209,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x0c: /* fldenv mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; @@ -5728,8 +6219,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fldcw(cpu_env, cpu_tmp2_i32); break; case 0x0e: /* fnstenv mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; @@ -5739,27 +6229,23 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_T0_A0(OT_WORD + s->mem_index); break; case 0x1d: /* fldt mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fldt_ST0(cpu_env, cpu_A0); break; case 0x1f: /* fstpt mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fstt_ST0(cpu_env, cpu_A0); gen_helper_fpop(cpu_env); break; case 0x2c: /* frstor mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; case 0x2e: /* fnsave mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; @@ -5769,14 +6255,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_T0_A0(OT_WORD + s->mem_index); break; case 0x3c: /* fbld */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fbld_ST0(cpu_env, cpu_A0); break; case 0x3e: /* fbstp */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fbst_ST0(cpu_env, cpu_A0); gen_helper_fpop(cpu_env); @@ -5814,8 +6298,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, switch(rm) { case 0: /* fnop */ /* check exceptions (FreeBSD FPU probe) */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fwait(cpu_env); break; @@ -5996,18 +6479,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x1d: /* fucomi */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fucomi_ST0_FT0(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x1e: /* fcomi */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fcomi_ST0_FT0(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x28: /* ffree sti */ gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg)); @@ -6059,20 +6540,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x3d: /* fucomip */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fucomi_ST0_FT0(cpu_env); gen_helper_fpop(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x3e: /* fcomip */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fcomi_ST0_FT0(cpu_env); gen_helper_fpop(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x10 ... 0x13: /* fcmovxx */ case 0x18 ... 0x1b: @@ -6086,7 +6565,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, }; op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, op1, l1); + gen_jcc1_noeob(s, op1, l1); gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg)); gen_set_label(l1); } @@ -6150,7 +6629,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); } else { gen_scas(s, ot); - s->cc_op = CC_OP_SUBB + ot; } break; @@ -6166,7 +6644,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); } else { gen_cmps(s, ot); - s->cc_op = CC_OP_SUBB + ot; } break; case 0x6c: /* insS */ @@ -6323,8 +6800,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, s->pc += 2; do_lret: if (s->pe && !s->vm86) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_lret_protected(cpu_env, tcg_const_i32(s->dflag), tcg_const_i32(val)); @@ -6354,21 +6830,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!s->pe) { /* real mode */ gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag)); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } else if (s->vm86) { if (s->iopl != 3) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag)); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag), tcg_const_i32(s->pc - s->cs_base)); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } gen_eob(s); break; @@ -6455,44 +6930,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x190 ... 0x19f: /* setcc Gv */ modrm = cpu_ldub_code(env, s->pc++); - gen_setcc(s, b); + gen_setcc1(s, b, cpu_T[0]); gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1); break; case 0x140 ... 0x14f: /* cmov Gv, Ev */ - { - int l1; - TCGv t0; - - ot = dflag + OT_WORD; - modrm = cpu_ldub_code(env, s->pc++); - reg = ((modrm >> 3) & 7) | rex_r; - mod = (modrm >> 6) & 3; - t0 = tcg_temp_local_new(); - if (mod != 3) { - gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); - gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, t0, rm); - } -#ifdef TARGET_X86_64 - if (ot == OT_LONG) { - /* XXX: specific Intel behaviour ? */ - l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); - tcg_gen_mov_tl(cpu_regs[reg], t0); - gen_set_label(l1); - tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); - } else -#endif - { - l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); - gen_op_mov_reg_v(ot, reg, t0); - gen_set_label(l1); - } - tcg_temp_free(t0); - } + ot = dflag + OT_WORD; + modrm = cpu_ldub_code(env, s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_cmovcc1(env, s, ot, b, modrm, reg); break; /************************/ @@ -6502,8 +6947,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->vm86 && s->iopl != 3) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_read_eflags(cpu_T[0], cpu_env); gen_push_T0(s); } @@ -6560,7 +7004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } } gen_pop_update(s); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); /* abort translation because TF/AC flag may change */ gen_jmp_im(s->pc - s->cs_base); gen_eob(s); @@ -6570,44 +7014,30 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; gen_op_mov_TN_reg(OT_BYTE, 0, R_AH); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]); - s->cc_op = CC_OP_EFLAGS; break; case 0x9f: /* lahf */ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_T[0]); + gen_compute_eflags(s); /* Note: gen_compute_eflags() only gives the condition codes */ - tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02); + tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02); gen_op_mov_reg_T0(OT_BYTE, R_AH); break; case 0xf5: /* cmc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s); tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); - s->cc_op = CC_OP_EFLAGS; break; case 0xf8: /* clc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); - s->cc_op = CC_OP_EFLAGS; break; case 0xf9: /* stc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s); tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); - s->cc_op = CC_OP_EFLAGS; break; case 0xfc: /* cld */ tcg_gen_movi_i32(cpu_tmp2_i32, 1); @@ -6697,7 +7127,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; } - s->cc_op = CC_OP_SARB + ot; + set_cc_op(s, CC_OP_SARB + ot); if (op != 0) { if (mod != 3) gen_op_st_T0_A0(ot + s->mem_index); @@ -6707,81 +7137,88 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_movi_tl(cpu_cc_dst, 0); } break; - case 0x1bc: /* bsf */ - case 0x1bd: /* bsr */ - { - int label1; - TCGv t0; - - ot = dflag + OT_WORD; - modrm = cpu_ldub_code(env, s->pc++); - reg = ((modrm >> 3) & 7) | rex_r; - gen_ldst_modrm(env, s,modrm, ot, OR_TMP0, 0); - gen_extu(ot, cpu_T[0]); - t0 = tcg_temp_local_new(); - tcg_gen_mov_tl(t0, cpu_T[0]); - if ((b & 1) && (prefixes & PREFIX_REPZ) && - (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { - switch(ot) { - case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0, - tcg_const_i32(16)); break; - case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0, - tcg_const_i32(32)); break; - case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0, - tcg_const_i32(64)); break; - } - gen_op_mov_reg_T0(ot, reg); + case 0x1bc: /* bsf / tzcnt */ + case 0x1bd: /* bsr / lzcnt */ + ot = dflag + OT_WORD; + modrm = cpu_ldub_code(env, s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_extu(ot, cpu_T[0]); + + /* Note that lzcnt and tzcnt are in different extensions. */ + if ((prefixes & PREFIX_REPZ) + && (b & 1 + ? s->cpuid_ext3_features & CPUID_EXT3_ABM + : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { + int size = 8 << ot; + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + if (b & 1) { + /* For lzcnt, reduce the target_ulong result by the + number of zeros that we expect to find at the top. */ + gen_helper_clz(cpu_T[0], cpu_T[0]); + tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size); } else { - label1 = gen_new_label(); - tcg_gen_movi_tl(cpu_cc_dst, 0); - tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); - if (b & 1) { - gen_helper_bsr(cpu_T[0], t0); - } else { - gen_helper_bsf(cpu_T[0], t0); - } - gen_op_mov_reg_T0(ot, reg); - tcg_gen_movi_tl(cpu_cc_dst, 1); - gen_set_label(label1); - tcg_gen_discard_tl(cpu_cc_src); - s->cc_op = CC_OP_LOGICB + ot; + /* For tzcnt, a zero input must return the operand size: + force all bits outside the operand size to 1. */ + target_ulong mask = (target_ulong)-2 << (size - 1); + tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask); + gen_helper_ctz(cpu_T[0], cpu_T[0]); } - tcg_temp_free(t0); + /* For lzcnt/tzcnt, C and Z bits are defined and are + related to the result. */ + gen_op_update1_cc(); + set_cc_op(s, CC_OP_BMILGB + ot); + } else { + /* For bsr/bsf, only the Z bit is defined and it is related + to the input and not the result. */ + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + set_cc_op(s, CC_OP_LOGICB + ot); + if (b & 1) { + /* For bsr, return the bit index of the first 1 bit, + not the count of leading zeros. */ + gen_helper_clz(cpu_T[0], cpu_T[0]); + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1); + } else { + gen_helper_ctz(cpu_T[0], cpu_T[0]); + } + /* ??? The manual says that the output is undefined when the + input is zero, but real hardware leaves it unchanged, and + real programs appear to depend on that. */ + tcg_gen_movi_tl(cpu_tmp0, 0); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0, + cpu_regs[reg], cpu_T[0]); } + gen_op_mov_reg_T0(ot, reg); break; /************************/ /* bcd */ case 0x27: /* daa */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_daa(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x2f: /* das */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_das(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x37: /* aaa */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_aaa(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x3f: /* aas */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_aas(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0xd4: /* aam */ if (CODE64(s)) @@ -6791,7 +7228,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base); } else { gen_helper_aam(cpu_env, tcg_const_i32(val)); - s->cc_op = CC_OP_LOGICB; + set_cc_op(s, CC_OP_LOGICB); } break; case 0xd5: /* aad */ @@ -6799,7 +7236,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, goto illegal_op; val = cpu_ldub_code(env, s->pc++); gen_helper_aad(cpu_env, tcg_const_i32(val)); - s->cc_op = CC_OP_LOGICB; + set_cc_op(s, CC_OP_LOGICB); break; /************************/ /* misc */ @@ -6821,8 +7258,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, (HF_MP_MASK | HF_TS_MASK)) { gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fwait(cpu_env); } @@ -6841,8 +7277,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0xce: /* into */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start)); break; @@ -6935,9 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0xd6: /* salc */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags_c(cpu_T[0]); + gen_compute_eflags_c(s, cpu_T[0]); tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); gen_op_mov_reg_T0(OT_BYTE, R_EAX); break; @@ -6961,17 +7394,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, switch(b) { case 0: /* loopnz */ case 1: /* loopz */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); gen_op_add_reg_im(s->aflag, R_ECX, -1); gen_op_jz_ecx(s->aflag, l3); - gen_compute_eflags(cpu_tmp0); - tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z); - if (b == 0) { - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1); - } else { - tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1); - } + gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1); break; case 2: /* loop */ gen_op_add_reg_im(s->aflag, R_ECX, -1); @@ -6998,8 +7423,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (b & 2) { gen_helper_rdmsr(cpu_env); @@ -7009,8 +7433,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x131: /* rdtsc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (use_icount) gen_io_start(); @@ -7021,8 +7444,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x133: /* rdpmc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_rdpmc(cpu_env); break; @@ -7068,15 +7490,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_jmp_im(pc_start - s->cs_base); gen_helper_sysret(cpu_env, tcg_const_i32(s->dflag)); /* condition codes are modified only in long mode */ - if (s->lma) - s->cc_op = CC_OP_EFLAGS; + if (s->lma) { + set_cc_op(s, CC_OP_EFLAGS); + } gen_eob(s); } break; #endif case 0x1a2: /* cpuid */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_cpuid(cpu_env); break; @@ -7084,8 +7506,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start)); s->is_jmp = DISAS_TB_JUMP; @@ -7147,14 +7568,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!s->pe || s->vm86) goto illegal_op; gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); if (op == 4) { gen_helper_verr(cpu_env, cpu_T[0]); } else { gen_helper_verw(cpu_env, cpu_T[0]); } - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; default: goto illegal_op; @@ -7186,8 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); #ifdef TARGET_X86_64 if (s->aflag == 2) { @@ -7247,8 +7666,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 2: /* lgdt */ case 3: /* lidt */ if (mod == 3) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); switch(rm) { case 0: /* VMRUN */ @@ -7376,8 +7794,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); gen_helper_invlpg(cpu_env, cpu_A0); @@ -7410,8 +7827,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 1: /* rdtscp */ if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (use_icount) gen_io_start(); @@ -7507,12 +7923,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } else { gen_op_mov_reg_v(ot, rm, t0); } - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); - s->cc_op = CC_OP_EFLAGS; tcg_temp_free(t0); tcg_temp_free(t1); tcg_temp_free(t2); @@ -7530,8 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0); t0 = tcg_temp_local_new(); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); if (b == 0x102) { gen_helper_lar(t0, cpu_env, cpu_T[0]); } else { @@ -7542,7 +7954,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); gen_op_mov_reg_v(ot, reg, t0); gen_set_label(label1); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); tcg_temp_free(t0); } break; @@ -7596,8 +8008,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 3: case 4: case 8: - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (b & 2) { gen_op_mov_TN_reg(ot, 0, rm); @@ -7686,8 +8097,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; } gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2))); break; @@ -7700,8 +8110,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; } gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2))); @@ -7785,7 +8194,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot)); gen_op_mov_reg_T0(ot, reg); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x10e ... 0x10f: /* 3DNow! instructions, ignore prefixes */ @@ -7820,12 +8229,12 @@ void optimize_flags_init(void) cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, cc_op), "cc_op"); - cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src), - "cc_src"); cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst), "cc_dst"); - cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_tmp), - "cc_tmp"); + cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src), + "cc_src"); + cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2), + "cc_src2"); #ifdef TARGET_X86_64 cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0, @@ -7918,6 +8327,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, dc->tf = (flags >> TF_SHIFT) & 1; dc->singlestep_enabled = env->singlestep_enabled; dc->cc_op = CC_OP_DYNAMIC; + dc->cc_op_dirty = false; dc->cs_base = cs_base; dc->tb = tb; dc->popl_esp_hack = 0; @@ -7951,16 +8361,15 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, cpu_T[0] = tcg_temp_new(); cpu_T[1] = tcg_temp_new(); cpu_A0 = tcg_temp_new(); - cpu_T3 = tcg_temp_new(); cpu_tmp0 = tcg_temp_new(); cpu_tmp1_i64 = tcg_temp_new_i64(); cpu_tmp2_i32 = tcg_temp_new_i32(); cpu_tmp3_i32 = tcg_temp_new_i32(); cpu_tmp4 = tcg_temp_new(); - cpu_tmp5 = tcg_temp_new(); cpu_ptr0 = tcg_temp_new_ptr(); cpu_ptr1 = tcg_temp_new_ptr(); + cpu_cc_srcT = tcg_temp_local_new(); gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE; diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c index 6dc730d882..b05572b734 100644 --- a/tests/tcg/test-i386.c +++ b/tests/tcg/test-i386.c @@ -209,7 +209,7 @@ static inline long i2l(long v) #define TEST_LEA16(STR)\ {\ asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\ - : "=wq" (res)\ + : "=r" (res)\ : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ printf("lea %s = %08lx\n", STR, res);\ } @@ -925,7 +925,7 @@ void test_fbcd(double a) void test_fenv(void) { - struct QEMU_PACKED { + struct __attribute__((__packed__)) { uint16_t fpuc; uint16_t dummy1; uint16_t fpus; @@ -935,7 +935,7 @@ void test_fenv(void) uint32_t ignored[4]; long double fpregs[8]; } float_env32; - struct QEMU_PACKED { + struct __attribute__((__packed__)) { uint16_t fpuc; uint16_t fpus; uint16_t fptag; @@ -1280,7 +1280,7 @@ void test_segs(void) struct { uint32_t offset; uint16_t seg; - } QEMU_PACKED segoff; + } __attribute__((__packed__)) segoff; ldt.entry_number = 1; ldt.base_addr = (unsigned long)&seg_data1; @@ -1828,7 +1828,7 @@ void test_exceptions(void) printf("lock nop exception:\n"); if (setjmp(jmp_env) == 0) { /* now execute an invalid instruction */ - asm volatile(".byte 0xf0, 0x90"); /* lock nop */ + asm volatile(".byte 0xf0, 0x90"); } printf("INT exception:\n"); |