aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2010-05-12 11:04:27 -0700
committerBlue Swirl <blauwirbel@gmail.com>2010-05-20 19:58:28 +0000
commit70c482852aed861d728654c7bad9404eff76d9e3 (patch)
treeda2b4b73bce3b8047b615f13a8424f9650a799c2
parentda441cffde8f593404e4187c57abf392717da2bc (diff)
target-sparc: Inline some generation of carry for ADDX/SUBX.
Computing carry is trivial for some inputs. By avoiding an external function call, we generate near-optimal code for the common cases of add+addx (double-word arithmetic) and cmp+addx (a setcc pattern). Signed-off-by: Richard Henderson <rth@twiddle.net> Acked-by: Artyom Tarasenko <atar4qemu@gmail.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
-rw-r--r--target-sparc/helper.h2
-rw-r--r--target-sparc/op_helper.c2
-rw-r--r--target-sparc/translate.c272
3 files changed, 200 insertions, 76 deletions
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index 04c1306d69..6f103e7697 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -158,6 +158,6 @@ VIS_CMPHELPER(cmpne);
#undef VIS_HELPER
#undef VIS_CMPHELPER
DEF_HELPER_0(compute_psr, void);
-DEF_HELPER_0(compute_C_icc, tl);
+DEF_HELPER_0(compute_C_icc, i32);
#include "def-helper.h"
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index 6deebd35d2..d0bc27766e 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -1347,7 +1347,7 @@ void helper_compute_psr(void)
CC_OP = CC_OP_FLAGS;
}
-target_ulong helper_compute_C_icc(void)
+uint32_t helper_compute_C_icc(void)
{
uint32_t ret;
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index ea7c71b85a..8129b79d16 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -332,24 +332,132 @@ static inline void gen_op_add_cc(TCGv dst, TCGv src1, TCGv src2)
tcg_gen_mov_tl(dst, cpu_cc_dst);
}
-static inline void gen_op_addxi_cc(TCGv dst, TCGv src1, target_long src2)
+static TCGv_i32 gen_add32_carry32(void)
{
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_mov_tl(cpu_cc_src, src1);
- tcg_gen_movi_tl(cpu_cc_src2, src2);
- tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
- tcg_gen_addi_tl(cpu_cc_dst, cpu_cc_dst, src2);
- tcg_gen_mov_tl(dst, cpu_cc_dst);
+ TCGv_i32 carry_32, cc_src1_32, cc_src2_32;
+
+ /* Carry is computed from a previous add: (dst < src) */
+#if TARGET_LONG_BITS == 64
+ cc_src1_32 = tcg_temp_new_i32();
+ cc_src2_32 = tcg_temp_new_i32();
+ tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_dst);
+ tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src);
+#else
+ cc_src1_32 = cpu_cc_dst;
+ cc_src2_32 = cpu_cc_src;
+#endif
+
+ carry_32 = tcg_temp_new_i32();
+ tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32);
+
+#if TARGET_LONG_BITS == 64
+ tcg_temp_free_i32(cc_src1_32);
+ tcg_temp_free_i32(cc_src2_32);
+#endif
+
+ return carry_32;
}
-static inline void gen_op_addx_cc(TCGv dst, TCGv src1, TCGv src2)
+static TCGv_i32 gen_sub32_carry32(void)
{
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_mov_tl(cpu_cc_src, src1);
- tcg_gen_mov_tl(cpu_cc_src2, src2);
- tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
- tcg_gen_add_tl(cpu_cc_dst, cpu_cc_dst, cpu_cc_src2);
- tcg_gen_mov_tl(dst, cpu_cc_dst);
+ TCGv_i32 carry_32, cc_src1_32, cc_src2_32;
+
+ /* Carry is computed from a previous borrow: (src1 < src2) */
+#if TARGET_LONG_BITS == 64
+ cc_src1_32 = tcg_temp_new_i32();
+ cc_src2_32 = tcg_temp_new_i32();
+ tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_src);
+ tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src2);
+#else
+ cc_src1_32 = cpu_cc_src;
+ cc_src2_32 = cpu_cc_src2;
+#endif
+
+ carry_32 = tcg_temp_new_i32();
+ tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32);
+
+#if TARGET_LONG_BITS == 64
+ tcg_temp_free_i32(cc_src1_32);
+ tcg_temp_free_i32(cc_src2_32);
+#endif
+
+ return carry_32;
+}
+
+static void gen_op_addx_int(DisasContext *dc, TCGv dst, TCGv src1,
+ TCGv src2, int update_cc)
+{
+ TCGv_i32 carry_32;
+ TCGv carry;
+
+ switch (dc->cc_op) {
+ case CC_OP_DIV:
+ case CC_OP_LOGIC:
+ /* Carry is known to be zero. Fall back to plain ADD. */
+ if (update_cc) {
+ gen_op_add_cc(dst, src1, src2);
+ } else {
+ tcg_gen_add_tl(dst, src1, src2);
+ }
+ return;
+
+ case CC_OP_ADD:
+ case CC_OP_TADD:
+ case CC_OP_TADDTV:
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+ {
+ /* For 32-bit hosts, we can re-use the host's hardware carry
+ generation by using an ADD2 opcode. We discard the low
+ part of the output. Ideally we'd combine this operation
+ with the add that generated the carry in the first place. */
+ TCGv dst_low = tcg_temp_new();
+ tcg_gen_op6_i32(INDEX_op_add2_i32, dst_low, dst,
+ cpu_cc_src, src1, cpu_cc_src2, src2);
+ tcg_temp_free(dst_low);
+ goto add_done;
+ }
+#endif
+ carry_32 = gen_add32_carry32();
+ break;
+
+ case CC_OP_SUB:
+ case CC_OP_TSUB:
+ case CC_OP_TSUBTV:
+ carry_32 = gen_sub32_carry32();
+ break;
+
+ default:
+ /* We need external help to produce the carry. */
+ carry_32 = tcg_temp_new_i32();
+ gen_helper_compute_C_icc(carry_32);
+ break;
+ }
+
+#if TARGET_LONG_BITS == 64
+ carry = tcg_temp_new();
+ tcg_gen_extu_i32_i64(carry, carry_32);
+#else
+ carry = carry_32;
+#endif
+
+ tcg_gen_add_tl(dst, src1, src2);
+ tcg_gen_add_tl(dst, dst, carry);
+
+ tcg_temp_free_i32(carry_32);
+#if TARGET_LONG_BITS == 64
+ tcg_temp_free(carry);
+#endif
+
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+ add_done:
+#endif
+ if (update_cc) {
+ tcg_gen_mov_tl(cpu_cc_src, src1);
+ tcg_gen_mov_tl(cpu_cc_src2, src2);
+ tcg_gen_mov_tl(cpu_cc_dst, dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX);
+ dc->cc_op = CC_OP_ADDX;
+ }
}
static inline void gen_op_tadd_cc(TCGv dst, TCGv src1, TCGv src2)
@@ -415,24 +523,80 @@ static inline void gen_op_sub_cc(TCGv dst, TCGv src1, TCGv src2)
tcg_gen_mov_tl(dst, cpu_cc_dst);
}
-static inline void gen_op_subxi_cc(TCGv dst, TCGv src1, target_long src2)
+static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1,
+ TCGv src2, int update_cc)
{
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_mov_tl(cpu_cc_src, src1);
- tcg_gen_movi_tl(cpu_cc_src2, src2);
- tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
- tcg_gen_subi_tl(cpu_cc_dst, cpu_cc_dst, src2);
- tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
+ TCGv_i32 carry_32;
+ TCGv carry;
-static inline void gen_op_subx_cc(TCGv dst, TCGv src1, TCGv src2)
-{
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_mov_tl(cpu_cc_src, src1);
- tcg_gen_mov_tl(cpu_cc_src2, src2);
- tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0);
- tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_dst, cpu_cc_src2);
- tcg_gen_mov_tl(dst, cpu_cc_dst);
+ switch (dc->cc_op) {
+ case CC_OP_DIV:
+ case CC_OP_LOGIC:
+ /* Carry is known to be zero. Fall back to plain SUB. */
+ if (update_cc) {
+ gen_op_sub_cc(dst, src1, src2);
+ } else {
+ tcg_gen_sub_tl(dst, src1, src2);
+ }
+ return;
+
+ case CC_OP_ADD:
+ case CC_OP_TADD:
+ case CC_OP_TADDTV:
+ carry_32 = gen_add32_carry32();
+ break;
+
+ case CC_OP_SUB:
+ case CC_OP_TSUB:
+ case CC_OP_TSUBTV:
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+ {
+ /* For 32-bit hosts, we can re-use the host's hardware carry
+ generation by using a SUB2 opcode. We discard the low
+ part of the output. Ideally we'd combine this operation
+ with the add that generated the carry in the first place. */
+ TCGv dst_low = tcg_temp_new();
+ tcg_gen_op6_i32(INDEX_op_sub2_i32, dst_low, dst,
+ cpu_cc_src, src1, cpu_cc_src2, src2);
+ tcg_temp_free(dst_low);
+ goto sub_done;
+ }
+#endif
+ carry_32 = gen_sub32_carry32();
+ break;
+
+ default:
+ /* We need external help to produce the carry. */
+ carry_32 = tcg_temp_new_i32();
+ gen_helper_compute_C_icc(carry_32);
+ break;
+ }
+
+#if TARGET_LONG_BITS == 64
+ carry = tcg_temp_new();
+ tcg_gen_extu_i32_i64(carry, carry_32);
+#else
+ carry = carry_32;
+#endif
+
+ tcg_gen_sub_tl(dst, src1, src2);
+ tcg_gen_sub_tl(dst, dst, carry);
+
+ tcg_temp_free_i32(carry_32);
+#if TARGET_LONG_BITS == 64
+ tcg_temp_free(carry);
+#endif
+
+#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
+ sub_done:
+#endif
+ if (update_cc) {
+ tcg_gen_mov_tl(cpu_cc_src, src1);
+ tcg_gen_mov_tl(cpu_cc_src2, src2);
+ tcg_gen_mov_tl(cpu_cc_dst, dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX);
+ dc->cc_op = CC_OP_SUBX;
+ }
}
static inline void gen_op_tsub_cc(TCGv dst, TCGv src1, TCGv src2)
@@ -2950,28 +3114,8 @@ static void disas_sparc_insn(DisasContext * dc)
}
break;
case 0x8: /* addx, V9 addc */
- if (IS_IMM) {
- simm = GET_FIELDs(insn, 19, 31);
- if (xop & 0x10) {
- gen_op_addxi_cc(cpu_dst, cpu_src1, simm);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX);
- dc->cc_op = CC_OP_ADDX;
- } else {
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, simm);
- tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_tmp0);
- }
- } else {
- if (xop & 0x10) {
- gen_op_addx_cc(cpu_dst, cpu_src1, cpu_src2);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX);
- dc->cc_op = CC_OP_ADDX;
- } else {
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_add_tl(cpu_tmp0, cpu_src2, cpu_tmp0);
- tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_tmp0);
- }
- }
+ gen_op_addx_int(dc, cpu_dst, cpu_src1, cpu_src2,
+ (xop & 0x10));
break;
#ifdef TARGET_SPARC64
case 0x9: /* V9 mulx */
@@ -3002,28 +3146,8 @@ static void disas_sparc_insn(DisasContext * dc)
}
break;
case 0xc: /* subx, V9 subc */
- if (IS_IMM) {
- simm = GET_FIELDs(insn, 19, 31);
- if (xop & 0x10) {
- gen_op_subxi_cc(cpu_dst, cpu_src1, simm);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX);
- dc->cc_op = CC_OP_SUBX;
- } else {
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, simm);
- tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_tmp0);
- }
- } else {
- if (xop & 0x10) {
- gen_op_subx_cc(cpu_dst, cpu_src1, cpu_src2);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX);
- dc->cc_op = CC_OP_SUBX;
- } else {
- gen_helper_compute_C_icc(cpu_tmp0);
- tcg_gen_add_tl(cpu_tmp0, cpu_src2, cpu_tmp0);
- tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_tmp0);
- }
- }
+ gen_op_subx_int(dc, cpu_dst, cpu_src1, cpu_src2,
+ (xop & 0x10));
break;
#ifdef TARGET_SPARC64
case 0xd: /* V9 udivx */