aboutsummaryrefslogtreecommitdiff
path: root/tcg/i386/tcg-target.c.inc
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/i386/tcg-target.c.inc')
-rw-r--r--tcg/i386/tcg-target.c.inc223
1 files changed, 100 insertions, 123 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index af71a397b1..278e567b56 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3029,6 +3029,92 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
#undef OP_32_64
}
+static int const umin_insn[4] = {
+ OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
+};
+
+static int const umax_insn[4] = {
+ OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
+};
+
+static bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond)
+{
+ static int const cmpeq_insn[4] = {
+ OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
+ };
+ static int const cmpgt_insn[4] = {
+ OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
+ };
+
+ enum {
+ NEED_INV = 1,
+ NEED_SWAP = 2,
+ NEED_UMIN = 4,
+ NEED_UMAX = 8,
+ INVALID = 16,
+ };
+ static const uint8_t cond_fixup[16] = {
+ [0 ... 15] = INVALID,
+ [TCG_COND_EQ] = 0,
+ [TCG_COND_GT] = 0,
+ [TCG_COND_NE] = NEED_INV,
+ [TCG_COND_LE] = NEED_INV,
+ [TCG_COND_LT] = NEED_SWAP,
+ [TCG_COND_GE] = NEED_SWAP | NEED_INV,
+ [TCG_COND_LEU] = NEED_UMIN,
+ [TCG_COND_GTU] = NEED_UMIN | NEED_INV,
+ [TCG_COND_GEU] = NEED_UMAX,
+ [TCG_COND_LTU] = NEED_UMAX | NEED_INV,
+ };
+ int fixup = cond_fixup[cond];
+
+ assert(!(fixup & INVALID));
+
+ if (fixup & NEED_INV) {
+ cond = tcg_invert_cond(cond);
+ }
+
+ if (fixup & NEED_SWAP) {
+ TCGReg swap = v1;
+ v1 = v2;
+ v2 = swap;
+ cond = tcg_swap_cond(cond);
+ }
+
+ if (fixup & (NEED_UMIN | NEED_UMAX)) {
+ int op = (fixup & NEED_UMIN ? umin_insn[vece] : umax_insn[vece]);
+
+ /* avx2 does not have 64-bit min/max; adjusted during expand. */
+ assert(vece <= MO_32);
+
+ tcg_out_vex_modrm_type(s, op, TCG_TMP_VEC, v1, v2, type);
+ v2 = TCG_TMP_VEC;
+ cond = TCG_COND_EQ;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ tcg_out_vex_modrm_type(s, cmpeq_insn[vece], v0, v1, v2, type);
+ break;
+ case TCG_COND_GT:
+ tcg_out_vex_modrm_type(s, cmpgt_insn[vece], v0, v1, v2, type);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return fixup & NEED_INV;
+}
+
+static void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond)
+{
+ if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) {
+ tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1);
+ tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type);
+ }
+}
+
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
@@ -3058,12 +3144,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
static int const shift_imm_insn[4] = {
OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
};
- static int const cmpeq_insn[4] = {
- OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
- };
- static int const cmpgt_insn[4] = {
- OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
- };
static int const punpckl_insn[4] = {
OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
};
@@ -3082,12 +3162,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
static int const smax_insn[4] = {
OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ
};
- static int const umin_insn[4] = {
- OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
- };
- static int const umax_insn[4] = {
- OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
- };
static int const rotlv_insn[4] = {
OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
};
@@ -3243,15 +3317,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_cmp_vec:
- sub = args[3];
- if (sub == TCG_COND_EQ) {
- insn = cmpeq_insn[vece];
- } else if (sub == TCG_COND_GT) {
- insn = cmpgt_insn[vece];
- } else {
- g_assert_not_reached();
- }
- goto gen_simd;
+ tcg_out_cmp_vec(s, type, vece, a0, a1, a2, args[3]);
+ break;
case INDEX_op_andc_vec:
insn = OPC_PANDN;
@@ -3971,88 +4038,19 @@ static void expand_vec_mul(TCGType type, unsigned vece,
}
}
-static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
{
- enum {
- NEED_INV = 1,
- NEED_SWAP = 2,
- NEED_BIAS = 4,
- NEED_UMIN = 8,
- NEED_UMAX = 16,
- };
- TCGv_vec t1, t2, t3;
- uint8_t fixup;
-
- switch (cond) {
- case TCG_COND_EQ:
- case TCG_COND_GT:
- fixup = 0;
- break;
- case TCG_COND_NE:
- case TCG_COND_LE:
- fixup = NEED_INV;
- break;
- case TCG_COND_LT:
- fixup = NEED_SWAP;
- break;
- case TCG_COND_GE:
- fixup = NEED_SWAP | NEED_INV;
- break;
- case TCG_COND_LEU:
- if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
- fixup = NEED_UMIN;
- } else {
- fixup = NEED_BIAS | NEED_INV;
- }
- break;
- case TCG_COND_GTU:
- if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
- fixup = NEED_UMIN | NEED_INV;
- } else {
- fixup = NEED_BIAS;
- }
- break;
- case TCG_COND_GEU:
- if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
- fixup = NEED_UMAX;
- } else {
- fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
- }
- break;
- case TCG_COND_LTU:
- if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
- fixup = NEED_UMAX | NEED_INV;
- } else {
- fixup = NEED_BIAS | NEED_SWAP;
- }
- break;
- default:
- g_assert_not_reached();
- }
-
- if (fixup & NEED_INV) {
- cond = tcg_invert_cond(cond);
- }
- if (fixup & NEED_SWAP) {
- t1 = v1, v1 = v2, v2 = t1;
- cond = tcg_swap_cond(cond);
- }
+ /*
+ * Without AVX512, there are no 64-bit unsigned comparisons.
+ * We must bias the inputs so that they become signed.
+ * All other swapping and inversion are handled during code generation.
+ */
+ if (vece == MO_64 && is_unsigned_cond(cond)) {
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
- t1 = t2 = NULL;
- if (fixup & (NEED_UMIN | NEED_UMAX)) {
- t1 = tcg_temp_new_vec(type);
- if (fixup & NEED_UMIN) {
- tcg_gen_umin_vec(vece, t1, v1, v2);
- } else {
- tcg_gen_umax_vec(vece, t1, v1, v2);
- }
- v2 = t1;
- cond = TCG_COND_EQ;
- } else if (fixup & NEED_BIAS) {
- t1 = tcg_temp_new_vec(type);
- t2 = tcg_temp_new_vec(type);
- t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
tcg_gen_sub_vec(vece, t1, v1, t3);
tcg_gen_sub_vec(vece, t2, v2, t3);
v1 = t1;
@@ -4060,26 +4058,9 @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
cond = tcg_signed_cond(cond);
}
- tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
/* Expand directly; do not recurse. */
vec_gen_4(INDEX_op_cmp_vec, type, vece,
tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
-
- if (t1) {
- tcg_temp_free_vec(t1);
- if (t2) {
- tcg_temp_free_vec(t2);
- }
- }
- return fixup & NEED_INV;
-}
-
-static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec v2, TCGCond cond)
-{
- if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
- tcg_gen_not_vec(vece, v0, v0);
- }
}
static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
@@ -4088,11 +4069,7 @@ static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
{
TCGv_vec t = tcg_temp_new_vec(type);
- if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
- /* Invert the sense of the compare by swapping arguments. */
- TCGv_vec x;
- x = v3, v3 = v4, v4 = x;
- }
+ expand_vec_cmp(type, vece, t, c1, c2, cond);
vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece,
tcgv_vec_arg(v0), tcgv_vec_arg(v4),
tcgv_vec_arg(v3), tcgv_vec_arg(t));