diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-04-26 09:38:32 +0100 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-05-25 15:29:30 +0000 |
commit | f6453695f9f87ba1974eca13322864810c90b9f0 (patch) | |
tree | b57b8625e2e2ca284ef60b24d2fad4ddc63f6bef /tcg | |
parent | 0956ecda9fad9e81b8cbb1e5a05ae60bf6971f2d (diff) |
tcg/riscv: Improve setcond expansion
Split out a helper function, tcg_out_setcond_int, which does not
always produce the complete boolean result, but returns a set of
flags to do so.
Based on 21af16198425, the same improvement for loongarch64.
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/riscv/tcg-target.c.inc | 150 |
1 files changed, 114 insertions, 36 deletions
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 05ea9fead8..db328ddc2d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_opc_branch(s, op, arg1, arg2, 0); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2) +#define SETCOND_INV TCG_TARGET_NB_REGS +#define SETCOND_NEZ (SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) + +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) { + int flags = 0; + switch (cond) { - case TCG_COND_EQ: - tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); - break; - case TCG_COND_NE: - tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); - break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + case TCG_COND_EQ: /* -> NE */ + case TCG_COND_GE: /* -> LT */ + case TCG_COND_GEU: /* -> LTU */ + case TCG_COND_GT: /* -> LE */ + case TCG_COND_GTU: /* -> LEU */ + cond = tcg_invert_cond(cond); + flags ^= SETCOND_INV; break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + default: break; + } + + switch (cond) { case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + case TCG_COND_LEU: + /* + * If we have a constant input, the most efficient way to implement + * LE is by adding 1 and using LT. Watch out for wrap around for LEU. + * We don't need to care for this for LE because the constant input + * is constrained to signed 12-bit, and 0x800 is representable in the + * temporary register. + */ + if (c2) { + if (cond == TCG_COND_LEU) { + /* unsigned <= -1 is true */ + if (arg2 == -1) { + tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); + return ret; + } + cond = TCG_COND_LTU; + } else { + cond = TCG_COND_LT; + } + tcg_debug_assert(arg2 <= 0x7ff); + if (++arg2 == 0x800) { + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); + arg2 = TCG_REG_TMP0; + c2 = false; + } + } else { + TCGReg tmp = arg2; + arg2 = arg1; + arg1 = tmp; + cond = tcg_swap_cond(cond); /* LE -> GE */ + cond = tcg_invert_cond(cond); /* GE -> LT */ + flags ^= SETCOND_INV; + } break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + default: break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + } + + switch (cond) { + case TCG_COND_NE: + flags |= SETCOND_NEZ; + if (!c2) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + } else if (arg2 == 0) { + ret = arg1; + } else { + tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2); + } break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + + case TCG_COND_LT: + if (c2) { + tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2); + } else { + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + } break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + + case TCG_COND_LTU: + if (c2) { + tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2); + } else { + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + } break; + default: - g_assert_not_reached(); - break; - } + g_assert_not_reached(); + } + + return ret | flags; +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); + + if (tmpflags != ret) { + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + + switch (tmpflags & SETCOND_FLAGS) { + case SETCOND_INV: + /* Intermediate result is boolean: simply invert. */ + tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1); + break; + case SETCOND_NEZ: + /* Intermediate result is zero/non-zero: test != 0. */ + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); + break; + case SETCOND_NEZ | SETCOND_INV: + /* Intermediate result is zero/non-zero: test == 0. */ + tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1); + break; + default: + g_assert_not_reached(); + } + } } static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) @@ -1542,7 +1620,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2); + tcg_out_setcond(s, args[3], a0, a1, a2, c2); break; case INDEX_op_qemu_ld_a32_i32: @@ -1665,6 +1743,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_and_i64: case INDEX_op_or_i64: case INDEX_op_xor_i64: + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: return C_O1_I2(r, r, rI); case INDEX_op_andc_i32: @@ -1686,7 +1766,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_setcond_i32: case INDEX_op_mul_i64: case INDEX_op_mulsh_i64: case INDEX_op_muluh_i64: @@ -1694,7 +1773,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: - case INDEX_op_setcond_i64: return C_O1_I2(r, rZ, rZ); case INDEX_op_shl_i32: |