aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2013-01-23 16:03:16 -0800
committerRichard Henderson <rth@twiddle.net>2013-02-18 15:39:09 -0800
commit988c3eb0d6f41ac13f4ec145c637f12c776de602 (patch)
tree870e54c572872d4a04c06722b803fdb48e387e66
parentdb9f2597722d5d8bc5f2330f186288d893114338 (diff)
target-i386: Use CC_SRC2 for ADC and SBB
Add another slot in ENV and store two of the three inputs. This lets us do less work when carry-out is not needed, and avoids the unpredictable CC_OP after translating these insns. Signed-off-by: Richard Henderson <rth@twiddle.net>
-rw-r--r--target-i386/cc_helper.c40
-rw-r--r--target-i386/cc_helper_template.h26
-rw-r--r--target-i386/cpu.h10
-rw-r--r--target-i386/helper.h4
-rw-r--r--target-i386/translate.c80
5 files changed, 75 insertions, 85 deletions
diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index a5d8181804..218a9b519f 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,7 +75,8 @@ const uint8_t parity_table[256] = {
#endif
-target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
+target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
+ target_ulong src2, int op)
{
switch (op) {
default: /* should never happen */
@@ -99,11 +100,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
return compute_all_addl(dst, src1);
case CC_OP_ADCB:
- return compute_all_adcb(dst, src1);
+ return compute_all_adcb(dst, src1, src2);
case CC_OP_ADCW:
- return compute_all_adcw(dst, src1);
+ return compute_all_adcw(dst, src1, src2);
case CC_OP_ADCL:
- return compute_all_adcl(dst, src1);
+ return compute_all_adcl(dst, src1, src2);
case CC_OP_SUBB:
return compute_all_subb(dst, src1);
@@ -113,11 +114,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
return compute_all_subl(dst, src1);
case CC_OP_SBBB:
- return compute_all_sbbb(dst, src1);
+ return compute_all_sbbb(dst, src1, src2);
case CC_OP_SBBW:
- return compute_all_sbbw(dst, src1);
+ return compute_all_sbbw(dst, src1, src2);
case CC_OP_SBBL:
- return compute_all_sbbl(dst, src1);
+ return compute_all_sbbl(dst, src1, src2);
case CC_OP_LOGICB:
return compute_all_logicb(dst, src1);
@@ -160,11 +161,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
case CC_OP_ADDQ:
return compute_all_addq(dst, src1);
case CC_OP_ADCQ:
- return compute_all_adcq(dst, src1);
+ return compute_all_adcq(dst, src1, src2);
case CC_OP_SUBQ:
return compute_all_subq(dst, src1);
case CC_OP_SBBQ:
- return compute_all_sbbq(dst, src1);
+ return compute_all_sbbq(dst, src1, src2);
case CC_OP_LOGICQ:
return compute_all_logicq(dst, src1);
case CC_OP_INCQ:
@@ -181,10 +182,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
{
- return helper_cc_compute_all(CC_DST, CC_SRC, op);
+ return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op);
}
-target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
+target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
+ target_ulong src2, int op)
{
switch (op) {
default: /* should never happen */
@@ -225,11 +227,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
return compute_c_addl(dst, src1);
case CC_OP_ADCB:
- return compute_c_adcb(dst, src1);
+ return compute_c_adcb(dst, src1, src2);
case CC_OP_ADCW:
- return compute_c_adcw(dst, src1);
+ return compute_c_adcw(dst, src1, src2);
case CC_OP_ADCL:
- return compute_c_adcl(dst, src1);
+ return compute_c_adcl(dst, src1, src2);
case CC_OP_SUBB:
return compute_c_subb(dst, src1);
@@ -239,11 +241,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
return compute_c_subl(dst, src1);
case CC_OP_SBBB:
- return compute_c_sbbb(dst, src1);
+ return compute_c_sbbb(dst, src1, src2);
case CC_OP_SBBW:
- return compute_c_sbbw(dst, src1);
+ return compute_c_sbbw(dst, src1, src2);
case CC_OP_SBBL:
- return compute_c_sbbl(dst, src1);
+ return compute_c_sbbl(dst, src1, src2);
case CC_OP_SHLB:
return compute_c_shlb(dst, src1);
@@ -256,11 +258,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
case CC_OP_ADDQ:
return compute_c_addq(dst, src1);
case CC_OP_ADCQ:
- return compute_c_adcq(dst, src1);
+ return compute_c_adcq(dst, src1, src2);
case CC_OP_SUBQ:
return compute_c_subq(dst, src1);
case CC_OP_SBBQ:
- return compute_c_sbbq(dst, src1);
+ return compute_c_sbbq(dst, src1, src2);
case CC_OP_SHLQ:
return compute_c_shlq(dst, src1);
#endif
diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
index 522b462285..87f47d2e97 100644
--- a/target-i386/cc_helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -58,12 +58,13 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
return dst < src1;
}
-static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+ DATA_TYPE src3)
{
int cf, pf, af, zf, sf, of;
- DATA_TYPE src2 = dst - src1 - 1;
+ DATA_TYPE src2 = dst - src1 - src3;
- cf = dst <= src1;
+ cf = (src3 ? dst <= src1 : dst < src1);
pf = parity_table[(uint8_t)dst];
af = (dst ^ src1 ^ src2) & 0x10;
zf = (dst == 0) << 6;
@@ -72,9 +73,10 @@ static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+ DATA_TYPE src3)
{
- return dst <= src1;
+ return src3 ? dst <= src1 : dst < src1;
}
static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
@@ -98,12 +100,13 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
return src1 < src2;
}
-static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
+static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+ DATA_TYPE src3)
{
int cf, pf, af, zf, sf, of;
- DATA_TYPE src1 = dst + src2 + 1;
+ DATA_TYPE src1 = dst + src2 + src3;
- cf = src1 <= src2;
+ cf = (src3 ? src1 <= src2 : src1 < src2);
pf = parity_table[(uint8_t)dst];
af = (dst ^ src1 ^ src2) & 0x10;
zf = (dst == 0) << 6;
@@ -112,11 +115,12 @@ static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
+static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+ DATA_TYPE src3)
{
- DATA_TYPE src1 = dst + src2 + 1;
+ DATA_TYPE src1 = dst + src2 + src3;
- return src1 <= src2;
+ return (src3 ? src1 <= src2 : src1 < src2);
}
static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 8c4c605299..1fa9dc8267 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -725,8 +725,9 @@ typedef struct CPUX86State {
stored elsewhere */
/* emulator internal eflags handling */
- target_ulong cc_src;
target_ulong cc_dst;
+ target_ulong cc_src;
+ target_ulong cc_src2;
uint32_t cc_op;
int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
uint32_t hflags; /* TB flags, see HF_xxx constants. These flags
@@ -1116,9 +1117,10 @@ static inline int cpu_mmu_index (CPUX86State *env)
#define EIP (env->eip)
#define DF (env->df)
-#define CC_SRC (env->cc_src)
-#define CC_DST (env->cc_dst)
-#define CC_OP (env->cc_op)
+#define CC_DST (env->cc_dst)
+#define CC_SRC (env->cc_src)
+#define CC_SRC2 (env->cc_src2)
+#define CC_OP (env->cc_op)
/* n must be a constant to be efficient */
static inline target_long lshift(target_long x, int n)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 901ff73c12..4c46ab1b40 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,7 +1,7 @@
#include "exec/def-helper.h"
-DEF_HELPER_FLAGS_3(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
-DEF_HELPER_FLAGS_3(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
+DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
+DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_0(lock, void)
DEF_HELPER_0(unlock, void)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5235aff15e..f667f9333b 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -61,7 +61,7 @@
/* global register indexes */
static TCGv_ptr cpu_env;
static TCGv cpu_A0;
-static TCGv cpu_cc_src, cpu_cc_dst, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
static TCGv_i32 cpu_cc_op;
static TCGv cpu_regs[CPU_NB_REGS];
/* local temps */
@@ -188,18 +188,19 @@ enum {
enum {
USES_CC_DST = 1,
USES_CC_SRC = 2,
- USES_CC_SRCT = 4,
+ USES_CC_SRC2 = 4,
+ USES_CC_SRCT = 8,
};
/* Bit set if the global variable is live after setting CC_OP to X. */
static const uint8_t cc_op_live[CC_OP_NB] = {
- [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
[CC_OP_EFLAGS] = USES_CC_SRC,
[CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
[CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
[CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
- [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
[CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
[CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
[CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
@@ -223,6 +224,9 @@ static void set_cc_op(DisasContext *s, CCOp op)
if (dead & USES_CC_SRC) {
tcg_gen_discard_tl(cpu_cc_src);
}
+ if (dead & USES_CC_SRC2) {
+ tcg_gen_discard_tl(cpu_cc_src2);
+ }
if (dead & USES_CC_SRCT) {
tcg_gen_discard_tl(cpu_cc_srcT);
}
@@ -867,6 +871,13 @@ static void gen_op_update2_cc(void)
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
+static void gen_op_update3_cc(TCGv reg)
+{
+ tcg_gen_mov_tl(cpu_cc_src2, reg);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
static inline void gen_op_testl_T0_T1_cc(void)
{
tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
@@ -882,7 +893,7 @@ static void gen_op_update_neg_cc(void)
/* compute all eflags to cc_src */
static void gen_compute_eflags(DisasContext *s)
{
- TCGv zero, dst, src1;
+ TCGv zero, dst, src1, src2;
int live, dead;
if (s->cc_op == CC_OP_EFLAGS) {
@@ -892,10 +903,11 @@ static void gen_compute_eflags(DisasContext *s)
TCGV_UNUSED(zero);
dst = cpu_cc_dst;
src1 = cpu_cc_src;
+ src2 = cpu_cc_src2;
/* Take care to not read values that are not live. */
live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
- dead = live ^ (USES_CC_DST | USES_CC_SRC);
+ dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
if (dead) {
zero = tcg_const_tl(0);
if (dead & USES_CC_DST) {
@@ -904,10 +916,13 @@ static void gen_compute_eflags(DisasContext *s)
if (dead & USES_CC_SRC) {
src1 = zero;
}
+ if (dead & USES_CC_SRC2) {
+ src2 = zero;
+ }
}
gen_update_cc_op(s);
- gen_helper_cc_compute_all(cpu_cc_src, dst, src1, cpu_cc_op);
+ gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
set_cc_op(s, CC_OP_EFLAGS);
if (dead) {
@@ -951,30 +966,6 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
.reg2 = t1, .mask = -1, .use_reg2 = true };
- case CC_OP_SBBB ... CC_OP_SBBQ:
- /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
- size = s->cc_op - CC_OP_SBBB;
- t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
- if (TCGV_EQUAL(t1, reg) && TCGV_EQUAL(reg, cpu_cc_src)) {
- tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
- t1 = cpu_tmp0;
- }
-
- tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
- tcg_gen_addi_tl(reg, reg, 1);
- gen_extu(size, reg);
- t0 = reg;
- goto adc_sbb;
-
- case CC_OP_ADCB ... CC_OP_ADCQ:
- /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
- size = s->cc_op - CC_OP_ADCB;
- t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
- t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
- adc_sbb:
- return (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
- .reg2 = t1, .mask = -1, .use_reg2 = true };
-
case CC_OP_LOGICB ... CC_OP_LOGICQ:
return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
@@ -1004,7 +995,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
/* The need to compute only C from CC_OP_DYNAMIC is important
in efficiently implementing e.g. INC at the start of a TB. */
gen_update_cc_op(s);
- gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op);
+ gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
+ cpu_cc_src2, cpu_cc_op);
return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
.mask = -1, .no_setcond = true };
}
@@ -1442,18 +1434,10 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
gen_op_mov_reg_T0(ot, d);
else
gen_op_st_T0_A0(ot + s1->mem_index);
- tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
- tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
- tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
- set_cc_op(s1, CC_OP_DYNAMIC);
+ gen_op_update3_cc(cpu_tmp4);
+ set_cc_op(s1, CC_OP_ADCB + ot);
break;
case OP_SBBL:
- /*
- * No need to store cpu_cc_srcT, because it is used only
- * when the cc_op is known.
- */
gen_compute_eflags_c(s1, cpu_tmp4);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
@@ -1461,12 +1445,8 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
gen_op_mov_reg_T0(ot, d);
else
gen_op_st_T0_A0(ot + s1->mem_index);
- tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
- tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
- tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
- set_cc_op(s1, CC_OP_DYNAMIC);
+ gen_op_update3_cc(cpu_tmp4);
+ set_cc_op(s1, CC_OP_SBBB + ot);
break;
case OP_ADDL:
gen_op_addl_T0_T1();
@@ -7788,6 +7768,8 @@ void optimize_flags_init(void)
"cc_dst");
cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
"cc_src");
+ cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
+ "cc_src2");
#ifdef TARGET_X86_64
cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,