diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/arm/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/optimize.c | 471 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/tcg-op.h | 11 | ||||
-rw-r--r-- | tcg/tcg.c | 57 | ||||
-rw-r--r-- | tcg/tcg.h | 2 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 3 |
14 files changed, 378 insertions, 185 deletions
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 0df33520f8..98fa11b286 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -75,8 +75,6 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 -#define TCG_TARGET_HAS_GUEST_BASE - enum { TCG_AREG0 = TCG_REG_R6, }; diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index 5351353719..f43fb419ae 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -103,8 +103,6 @@ typedef enum { #define TCG_TARGET_HAS_ext8u_i32 0 /* and rd, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i32 0 /* and rd, rs, 0xffff */ -#define TCG_TARGET_HAS_GUEST_BASE - #define TCG_AREG0 TCG_REG_R17 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index ace63ba37b..dbc6756414 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -125,8 +125,6 @@ typedef enum { ((ofs) == 0 && (len) == 16)) #define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid -#define TCG_TARGET_HAS_GUEST_BASE - #if TCG_TARGET_REG_BITS == 64 # define TCG_AREG0 TCG_REG_R14 #else diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 4255ca5225..91fe7a3b06 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -147,9 +147,6 @@ typedef enum { #define TCG_AREG0 TCG_REG_R7 -/* Guest base is supported */ -#define TCG_TARGET_HAS_GUEST_BASE - static inline void flush_icache_range(tcg_target_ulong start, tcg_target_ulong stop) { diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 7020d65845..65b5c59e89 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -116,9 +116,6 @@ typedef enum { #define TCG_AREG0 TCG_REG_S0 -/* guest base is supported */ -#define TCG_TARGET_HAS_GUEST_BASE - #ifdef __OpenBSD__ #include <machine/sysarch.h> #else diff --git a/tcg/optimize.c b/tcg/optimize.c index edb2b0ea90..a06c8eb43e 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -292,6 +292,82 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) return res; } +static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) +{ + switch (c) { + case TCG_COND_EQ: + return x == y; + case TCG_COND_NE: + return x != y; + case TCG_COND_LT: + return (int32_t)x < (int32_t)y; + case TCG_COND_GE: + return (int32_t)x >= (int32_t)y; + case TCG_COND_LE: + return (int32_t)x <= (int32_t)y; + case TCG_COND_GT: + return (int32_t)x > (int32_t)y; + case TCG_COND_LTU: + return x < y; + case TCG_COND_GEU: + return x >= y; + case TCG_COND_LEU: + return x <= y; + case TCG_COND_GTU: + return x > y; + default: + tcg_abort(); + } +} + +static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) +{ + switch (c) { + case TCG_COND_EQ: + return x == y; + case TCG_COND_NE: + return x != y; + case TCG_COND_LT: + return (int64_t)x < (int64_t)y; + case TCG_COND_GE: + return (int64_t)x >= (int64_t)y; + case TCG_COND_LE: + return (int64_t)x <= (int64_t)y; + case TCG_COND_GT: + return (int64_t)x > (int64_t)y; + case TCG_COND_LTU: + return x < y; + case TCG_COND_GEU: + return x >= y; + case TCG_COND_LEU: + return x <= y; + case TCG_COND_GTU: + return x > y; + default: + tcg_abort(); + } +} + +static bool do_constant_folding_cond_eq(TCGCond c) +{ + switch (c) { + case TCG_COND_GT: + case TCG_COND_LTU: + case TCG_COND_LT: + case TCG_COND_GTU: + case TCG_COND_NE: + return 0; + case TCG_COND_GE: + case TCG_COND_GEU: + case TCG_COND_LE: + case TCG_COND_LEU: + case TCG_COND_EQ: + return 1; + default: + tcg_abort(); + } +} + /* Return 2 if the condition can't be simplified, and the result of the condition (0 or 1) if it can */ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, @@ -300,75 +376,14 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { switch (op_bits(op)) { case 32: - switch (c) { - case TCG_COND_EQ: - return (uint32_t)temps[x].val == (uint32_t)temps[y].val; - case TCG_COND_NE: - return (uint32_t)temps[x].val != (uint32_t)temps[y].val; - case TCG_COND_LT: - return (int32_t)temps[x].val < (int32_t)temps[y].val; - case TCG_COND_GE: - return (int32_t)temps[x].val >= (int32_t)temps[y].val; - case TCG_COND_LE: - return (int32_t)temps[x].val <= (int32_t)temps[y].val; - case TCG_COND_GT: - return (int32_t)temps[x].val > (int32_t)temps[y].val; - case TCG_COND_LTU: - return (uint32_t)temps[x].val < (uint32_t)temps[y].val; - case TCG_COND_GEU: - return (uint32_t)temps[x].val >= (uint32_t)temps[y].val; - case TCG_COND_LEU: - return (uint32_t)temps[x].val <= (uint32_t)temps[y].val; - case TCG_COND_GTU: - return (uint32_t)temps[x].val > (uint32_t)temps[y].val; - default: - break; - } - break; + return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); case 64: - switch (c) { - case TCG_COND_EQ: - return (uint64_t)temps[x].val == (uint64_t)temps[y].val; - case TCG_COND_NE: - return (uint64_t)temps[x].val != (uint64_t)temps[y].val; - case TCG_COND_LT: - return (int64_t)temps[x].val < (int64_t)temps[y].val; - case TCG_COND_GE: - return (int64_t)temps[x].val >= (int64_t)temps[y].val; - case TCG_COND_LE: - return (int64_t)temps[x].val <= (int64_t)temps[y].val; - case TCG_COND_GT: - return (int64_t)temps[x].val > (int64_t)temps[y].val; - case TCG_COND_LTU: - return (uint64_t)temps[x].val < (uint64_t)temps[y].val; - case TCG_COND_GEU: - return (uint64_t)temps[x].val >= (uint64_t)temps[y].val; - case TCG_COND_LEU: - return (uint64_t)temps[x].val <= (uint64_t)temps[y].val; - case TCG_COND_GTU: - return (uint64_t)temps[x].val > (uint64_t)temps[y].val; - default: - break; - } - break; - } - } else if (temps_are_copies(x, y)) { - switch (c) { - case TCG_COND_GT: - case TCG_COND_LTU: - case TCG_COND_LT: - case TCG_COND_GTU: - case TCG_COND_NE: - return 0; - case TCG_COND_GE: - case TCG_COND_GEU: - case TCG_COND_LE: - case TCG_COND_LEU: - case TCG_COND_EQ: - return 1; + return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); default: - break; + tcg_abort(); } + } else if (temps_are_copies(x, y)) { + return do_constant_folding_cond_eq(c); } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { switch (c) { case TCG_COND_LTU: @@ -381,11 +396,73 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, } else { return 2; } +} - fprintf(stderr, - "Unrecognized bitness %d or condition %d in " - "do_constant_folding_cond.\n", op_bits(op), c); - tcg_abort(); +/* Return 2 if the condition can't be simplified, and the result + of the condition (0 or 1) if it can */ +static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) +{ + TCGArg al = p1[0], ah = p1[1]; + TCGArg bl = p2[0], bh = p2[1]; + + if (temps[bl].state == TCG_TEMP_CONST + && temps[bh].state == TCG_TEMP_CONST) { + uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; + + if (temps[al].state == TCG_TEMP_CONST + && temps[ah].state == TCG_TEMP_CONST) { + uint64_t a; + a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; + return do_constant_folding_cond_64(a, b, c); + } + if (b == 0) { + switch (c) { + case TCG_COND_LTU: + return 0; + case TCG_COND_GEU: + return 1; + default: + break; + } + } + } + if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { + return do_constant_folding_cond_eq(c); + } + return 2; +} + +static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) +{ + TCGArg a1 = *p1, a2 = *p2; + int sum = 0; + sum += temps[a1].state == TCG_TEMP_CONST; + sum -= temps[a2].state == TCG_TEMP_CONST; + + /* Prefer the constant in second argument, and then the form + op a, a, b, which is better handled on non-RISC hosts. */ + if (sum > 0 || (sum == 0 && dest == a2)) { + *p1 = a2; + *p2 = a1; + return true; + } + return false; +} + +static bool swap_commutative2(TCGArg *p1, TCGArg *p2) +{ + int sum = 0; + sum += temps[p1[0]].state == TCG_TEMP_CONST; + sum += temps[p1[1]].state == TCG_TEMP_CONST; + sum -= temps[p2[0]].state == TCG_TEMP_CONST; + sum -= temps[p2[1]].state == TCG_TEMP_CONST; + if (sum > 0) { + TCGArg t; + t = p1[0], p1[0] = p2[0], p2[0] = t; + t = p1[1], p1[1] = p2[1], p2[1] = t; + return true; + } + return false; } /* Propagate constants and copies, fold constant expressions. */ @@ -397,7 +474,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, const TCGOpDef *def; TCGArg *gen_args; TCGArg tmp; - TCGCond cond; /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. @@ -440,52 +516,46 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): - /* Prefer the constant in second argument, and then the form - op a, a, b, which is better handled on non-RISC hosts. */ - if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2] - && temps[args[2]].state != TCG_TEMP_CONST)) { - tmp = args[1]; - args[1] = args[2]; - args[2] = tmp; - } + swap_commutative(args[0], &args[1], &args[2]); break; CASE_OP_32_64(brcond): - if (temps[args[0]].state == TCG_TEMP_CONST - && temps[args[1]].state != TCG_TEMP_CONST) { - tmp = args[0]; - args[0] = args[1]; - args[1] = tmp; + if (swap_commutative(-1, &args[0], &args[1])) { args[2] = tcg_swap_cond(args[2]); } break; CASE_OP_32_64(setcond): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state != TCG_TEMP_CONST) { - tmp = args[1]; - args[1] = args[2]; - args[2] = tmp; + if (swap_commutative(args[0], &args[1], &args[2])) { args[3] = tcg_swap_cond(args[3]); } break; CASE_OP_32_64(movcond): - cond = args[5]; - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state != TCG_TEMP_CONST) { - tmp = args[1]; - args[1] = args[2]; - args[2] = tmp; - cond = tcg_swap_cond(cond); + if (swap_commutative(-1, &args[1], &args[2])) { + args[5] = tcg_swap_cond(args[5]); } /* For movcond, we canonicalize the "false" input reg to match the destination reg so that the tcg backend can implement a "move if true" operation. */ - if (args[0] == args[3]) { - tmp = args[3]; - args[3] = args[4]; - args[4] = tmp; - cond = tcg_invert_cond(cond); + if (swap_commutative(args[0], &args[4], &args[3])) { + args[5] = tcg_invert_cond(args[5]); } - args[5] = cond; + break; + case INDEX_op_add2_i32: + swap_commutative(args[0], &args[2], &args[4]); + swap_commutative(args[1], &args[3], &args[5]); + break; + case INDEX_op_mulu2_i32: + swap_commutative(args[0], &args[2], &args[3]); + break; + case INDEX_op_brcond2_i32: + if (swap_commutative2(&args[0], &args[2])) { + args[4] = tcg_swap_cond(args[4]); + } + break; + case INDEX_op_setcond2_i32: + if (swap_commutative2(&args[1], &args[3])) { + args[5] = tcg_swap_cond(args[5]); + } + break; default: break; } @@ -622,6 +692,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_args += 2; args += 2; break; + CASE_OP_32_64(not): CASE_OP_32_64(neg): CASE_OP_32_64(ext8s): @@ -634,14 +705,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, gen_opc_buf[op_index] = op_to_movi(op); tmp = do_constant_folding(op, temps[args[1]].val, 0); tcg_opt_gen_movi(gen_args, args[0], tmp); - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; + gen_args += 2; + args += 2; + break; } - gen_args += 2; - args += 2; - break; + goto do_default; + CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): @@ -665,15 +734,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, temps[args[2]].val); tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args += 3; + args += 3; + break; } - args += 3; - break; + goto do_default; + CASE_OP_32_64(deposit): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { @@ -683,33 +748,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, | ((temps[args[2]].val & tmp) << args[3]); tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args[4] = args[4]; - gen_args += 5; + args += 5; + break; } - args += 5; - break; + goto do_default; + CASE_OP_32_64(setcond): tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); if (tmp != 2) { gen_opc_buf[op_index] = op_to_movi(op); tcg_opt_gen_movi(gen_args, args[0], tmp); gen_args += 2; - } else { - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args += 4; + args += 4; + break; } - args += 4; - break; + goto do_default; + CASE_OP_32_64(brcond): tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); if (tmp != 2) { @@ -721,17 +775,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } else { gen_opc_buf[op_index] = INDEX_op_nop; } - } else { - memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); - reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args += 4; + args += 4; + break; } - args += 4; - break; + goto do_default; + CASE_OP_32_64(movcond): tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); if (tmp != 2) { @@ -746,18 +794,125 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); gen_args += 2; } + args += 6; + break; + } + goto do_default; + + case INDEX_op_add2_i32: + case INDEX_op_sub2_i32: + if (temps[args[2]].state == TCG_TEMP_CONST + && temps[args[3]].state == TCG_TEMP_CONST + && temps[args[4]].state == TCG_TEMP_CONST + && temps[args[5]].state == TCG_TEMP_CONST) { + uint32_t al = temps[args[2]].val; + uint32_t ah = temps[args[3]].val; + uint32_t bl = temps[args[4]].val; + uint32_t bh = temps[args[5]].val; + uint64_t a = ((uint64_t)ah << 32) | al; + uint64_t b = ((uint64_t)bh << 32) | bl; + TCGArg rl, rh; + + if (op == INDEX_op_add2_i32) { + a += b; + } else { + a -= b; + } + + /* We emit the extra nop when we emit the add2/sub2. */ + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + + rl = args[0]; + rh = args[1]; + gen_opc_buf[op_index] = INDEX_op_movi_i32; + gen_opc_buf[++op_index] = INDEX_op_movi_i32; + tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); + tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); + gen_args += 4; + args += 6; + break; + } + goto do_default; + + case INDEX_op_mulu2_i32: + if (temps[args[2]].state == TCG_TEMP_CONST + && temps[args[3]].state == TCG_TEMP_CONST) { + uint32_t a = temps[args[2]].val; + uint32_t b = temps[args[3]].val; + uint64_t r = (uint64_t)a * b; + TCGArg rl, rh; + + /* We emit the extra nop when we emit the mulu2. */ + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + + rl = args[0]; + rh = args[1]; + gen_opc_buf[op_index] = INDEX_op_movi_i32; + gen_opc_buf[++op_index] = INDEX_op_movi_i32; + tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); + tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); + gen_args += 4; + args += 4; + break; + } + goto do_default; + + case INDEX_op_brcond2_i32: + tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); + if (tmp != 2) { + if (tmp) { + memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + gen_opc_buf[op_index] = INDEX_op_br; + gen_args[0] = args[5]; + gen_args += 1; + } else { + gen_opc_buf[op_index] = INDEX_op_nop; + } + } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) + && temps[args[2]].state == TCG_TEMP_CONST + && temps[args[3]].state == TCG_TEMP_CONST + && temps[args[2]].val == 0 + && temps[args[3]].val == 0) { + /* Simplify LT/GE comparisons vs zero to a single compare + vs the high word of the input. */ + memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); + gen_opc_buf[op_index] = INDEX_op_brcond_i32; + gen_args[0] = args[1]; + gen_args[1] = args[3]; + gen_args[2] = args[4]; + gen_args[3] = args[5]; + gen_args += 4; } else { - reset_temp(args[0]); + goto do_default; + } + args += 6; + break; + + case INDEX_op_setcond2_i32: + tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); + if (tmp != 2) { + gen_opc_buf[op_index] = INDEX_op_movi_i32; + tcg_opt_gen_movi(gen_args, args[0], tmp); + gen_args += 2; + } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) + && temps[args[3]].state == TCG_TEMP_CONST + && temps[args[4]].state == TCG_TEMP_CONST + && temps[args[3]].val == 0 + && temps[args[4]].val == 0) { + /* Simplify LT/GE comparisons vs zero to a single compare + vs the high word of the input. */ + gen_opc_buf[op_index] = INDEX_op_setcond_i32; gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[2]; - gen_args[3] = args[3]; - gen_args[4] = args[4]; - gen_args[5] = args[5]; - gen_args += 6; + gen_args[1] = args[2]; + gen_args[2] = args[4]; + gen_args[3] = args[5]; + gen_args += 4; + } else { + goto do_default; } args += 6; break; + case INDEX_op_call: nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) { @@ -776,11 +931,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, i--; } break; + default: - /* Default case: we do know nothing about operation so no - propagation is done. We trash everything if the operation - is the end of a basic block, otherwise we only trash the - output args. */ + do_default: + /* Default case: we know nothing about operation (or were unable + to compute the operation result) so no propagation is done. + We trash everything if the operation is the end of a basic + block, otherwise we only trash the output args. */ if (def->flags & TCG_OPF_BB_END) { memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); } else { diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 3259d898ab..ad433ae5bb 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -96,8 +96,6 @@ typedef enum { #define TCG_AREG0 TCG_REG_R27 -#define TCG_TARGET_HAS_GUEST_BASE - #define tcg_qemu_tb_exec(env, tb_ptr) \ ((long __attribute__ ((longcall)) \ (*)(void *, void *))code_gen_prologue)(env, tb_ptr) diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 57569e8938..97fc5c9885 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -108,5 +108,4 @@ typedef enum { #define TCG_AREG0 TCG_REG_R27 -#define TCG_TARGET_HAS_GUEST_BASE #define TCG_TARGET_EXTEND_ARGS 1 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index ed55c331c6..a0181aef74 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -88,8 +88,6 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_movcond_i64 0 #endif -#define TCG_TARGET_HAS_GUEST_BASE - /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R15 #define TCG_TARGET_STACK_ALIGN 8 diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index c2fbb23abb..0e7d398a41 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -125,8 +125,6 @@ typedef enum { #define TCG_TARGET_HAS_movcond_i64 1 #endif -#define TCG_TARGET_HAS_GUEST_BASE - #define TCG_AREG0 TCG_REG_I0 static inline void flush_icache_range(tcg_target_ulong start, diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 551845801d..8100a5a2e0 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -25,6 +25,11 @@ int gen_new_label(void); +static inline void tcg_gen_op0(TCGOpcode opc) +{ + *gen_opc_ptr++ = opc; +} + static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1) { *gen_opc_ptr++ = opc; @@ -886,6 +891,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + /* Allow the optimizer room to replace add2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); } static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) @@ -893,6 +900,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + /* Allow the optimizer room to replace sub2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); } static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) @@ -1018,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), TCGV_LOW(arg1), TCGV_LOW(arg2)); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); @@ -62,10 +62,6 @@ #include "elf.h" -#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE) -#error GUEST_BASE not supported on this host. -#endif - /* Forward declarations for functions declared in tcg-target.c and used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); @@ -1307,8 +1303,58 @@ static void tcg_liveness_analysis(TCGContext *s) break; case INDEX_op_end: break; - /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ + + case INDEX_op_add2_i32: + case INDEX_op_sub2_i32: + args -= 6; + nb_iargs = 4; + nb_oargs = 2; + /* Test if the high part of the operation is dead, but not + the low part. The result can be optimized to a simple + add or sub. This happens often for x86_64 guest when the + cpu mode is set to 32 bit. */ + if (dead_temps[args[1]]) { + if (dead_temps[args[0]]) { + goto do_remove; + } + /* Create the single operation plus nop. */ + if (op == INDEX_op_add2_i32) { + op = INDEX_op_add_i32; + } else { + op = INDEX_op_sub_i32; + } + gen_opc_buf[op_index] = op; + args[1] = args[2]; + args[2] = args[4]; + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3); + /* Fall through and mark the single-word operation live. */ + nb_iargs = 2; + nb_oargs = 1; + } + goto do_not_remove; + + case INDEX_op_mulu2_i32: + args -= 4; + nb_iargs = 2; + nb_oargs = 2; + /* Likewise, test for the high part of the operation dead. */ + if (dead_temps[args[1]]) { + if (dead_temps[args[0]]) { + goto do_remove; + } + gen_opc_buf[op_index] = op = INDEX_op_mul_i32; + args[1] = args[2]; + args[2] = args[3]; + assert(gen_opc_buf[op_index + 1] == INDEX_op_nop); + tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1); + /* Fall through and mark the single-word operation live. */ + nb_oargs = 1; + } + goto do_not_remove; + default: + /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ args -= def->nb_args; nb_iargs = def->nb_iargs; nb_oargs = def->nb_oargs; @@ -1322,6 +1368,7 @@ static void tcg_liveness_analysis(TCGContext *s) if (!dead_temps[arg]) goto do_not_remove; } + do_remove: tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args); #ifdef CONFIG_PROFILER s->del_op_count++; @@ -616,7 +616,7 @@ TCGv_i64 tcg_const_i64(int64_t val); TCGv_i32 tcg_const_local_i32(int32_t val); TCGv_i64 tcg_const_local_i64(int64_t val); -extern uint8_t code_gen_prologue[]; +extern uint8_t *code_gen_prologue; /* TCG targets may use a different definition of tcg_qemu_tb_exec. */ #if !defined(tcg_qemu_tb_exec) diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 6d894953aa..37f28c0522 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -102,9 +102,6 @@ #define TCG_TARGET_HAS_movcond_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ -/* Offset to user memory in user mode. */ -#define TCG_TARGET_HAS_GUEST_BASE - /* Number of registers available. For 32 bit hosts, we need more than 8 registers (call arguments). */ /* #define TCG_TARGET_NB_REGS 8 */ |