diff options
Diffstat (limited to 'tcg/i386/tcg-target.c.inc')
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 486 |
1 files changed, 311 insertions, 175 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 9a54ef7f8d..1bf50f1f62 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -133,6 +133,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_I32 0x400 #define TCG_CT_CONST_WSZ 0x800 #define TCG_CT_CONST_TST 0x1000 +#define TCG_CT_CONST_ZERO 0x2000 /* Registers used with L constraint, which are the first argument registers on x86_64, and two random call clobbered registers on @@ -226,6 +227,9 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return 1; } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } return 0; } @@ -409,6 +413,18 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_UD2 (0x0b | P_EXT) #define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) #define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) +#define OPC_VPBLENDMB (0x66 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPBLENDMW (0x66 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPBLENDMD (0x64 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPBLENDMQ (0x64 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPB (0x3f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUB (0x3e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPW (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUW (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPD (0x1f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUD (0x1e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPQ (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUQ (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) #define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) #define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) #define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) @@ -417,6 +433,10 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) +#define OPC_VPMOVM2B (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2W (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPMOVM2D (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2Q (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) #define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) #define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) @@ -442,6 +462,14 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) #define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMB (0x26 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMW (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMD (0x27 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMQ (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMB (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMW (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMD (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMQ (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) #define OPC_XCHG_EvGv (0x87) @@ -658,7 +686,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, } static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, - int rm, int index) + int rm, int index, int aaa, bool z) { /* The entire 4-byte evex prefix; with R' and V' set. */ uint32_t p = 0x08041062; @@ -695,7 +723,9 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, p = deposit32(p, 16, 2, pp); p = deposit32(p, 19, 4, ~v); p = deposit32(p, 23, 1, (opc & P_VEXW) != 0); + p = deposit32(p, 24, 3, aaa); p = deposit32(p, 29, 2, (opc & P_VEXL) != 0); + p = deposit32(p, 31, 1, z); tcg_out32(s, p); tcg_out8(s, opc); @@ -704,13 +734,32 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) { if (opc & P_EVEX) { - tcg_out_evex_opc(s, opc, r, v, rm, 0); + tcg_out_evex_opc(s, opc, r, v, rm, 0, 0, false); } else { tcg_out_vex_opc(s, opc, r, v, rm, 0); } tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); } +static void tcg_out_vex_modrm_type(TCGContext *s, int opc, + int r, int v, int rm, TCGType type) +{ + if (type == TCG_TYPE_V256) { + opc |= P_VEXL; + } + tcg_out_vex_modrm(s, opc, r, v, rm); +} + +static void tcg_out_evex_modrm_type(TCGContext *s, int opc, int r, int v, + int rm, int aaa, bool z, TCGType type) +{ + if (type == TCG_TYPE_V256) { + opc |= P_VEXL; + } + tcg_out_evex_opc(s, opc, r, v, rm, 0, aaa, z); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + /* Output an opcode with a full "rm + (index<<shift) + offset" address mode. We handle either RM and INDEX missing with a negative value. In 64-bit mode for absolute addresses, ~RM is the size of the immediate operand @@ -904,8 +953,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg a) { if (have_avx2) { - int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); - tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a); + tcg_out_vex_modrm_type(s, avx2_dup_insn[vece], r, 0, a, type); } else { switch (vece) { case MO_8: @@ -3021,6 +3069,214 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, #undef OP_32_64 } +static int const umin_insn[4] = { + OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ +}; + +static int const umax_insn[4] = { + OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ +}; + +static bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) +{ + static int const cmpeq_insn[4] = { + OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ + }; + static int const cmpgt_insn[4] = { + OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ + }; + + enum { + NEED_INV = 1, + NEED_SWAP = 2, + NEED_UMIN = 4, + NEED_UMAX = 8, + INVALID = 16, + }; + static const uint8_t cond_fixup[16] = { + [0 ... 15] = INVALID, + [TCG_COND_EQ] = 0, + [TCG_COND_GT] = 0, + [TCG_COND_NE] = NEED_INV, + [TCG_COND_LE] = NEED_INV, + [TCG_COND_LT] = NEED_SWAP, + [TCG_COND_GE] = NEED_SWAP | NEED_INV, + [TCG_COND_LEU] = NEED_UMIN, + [TCG_COND_GTU] = NEED_UMIN | NEED_INV, + [TCG_COND_GEU] = NEED_UMAX, + [TCG_COND_LTU] = NEED_UMAX | NEED_INV, + }; + int fixup = cond_fixup[cond]; + + assert(!(fixup & INVALID)); + + if (fixup & NEED_INV) { + cond = tcg_invert_cond(cond); + } + + if (fixup & NEED_SWAP) { + TCGReg swap = v1; + v1 = v2; + v2 = swap; + cond = tcg_swap_cond(cond); + } + + if (fixup & (NEED_UMIN | NEED_UMAX)) { + int op = (fixup & NEED_UMIN ? umin_insn[vece] : umax_insn[vece]); + + /* avx2 does not have 64-bit min/max; adjusted during expand. */ + assert(vece <= MO_32); + + tcg_out_vex_modrm_type(s, op, TCG_TMP_VEC, v1, v2, type); + v2 = TCG_TMP_VEC; + cond = TCG_COND_EQ; + } + + switch (cond) { + case TCG_COND_EQ: + tcg_out_vex_modrm_type(s, cmpeq_insn[vece], v0, v1, v2, type); + break; + case TCG_COND_GT: + tcg_out_vex_modrm_type(s, cmpgt_insn[vece], v0, v1, v2, type); + break; + default: + g_assert_not_reached(); + } + return fixup & NEED_INV; +} + +static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, + TCGReg v1, TCGReg v2, TCGCond cond) +{ + static const int cmpm_insn[2][4] = { + { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ }, + { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ } + }; + static const int testm_insn[4] = { + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ + }; + static const int testnm_insn[4] = { + OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ + }; + + static const int cond_ext[16] = { + [TCG_COND_EQ] = 0, + [TCG_COND_NE] = 4, + [TCG_COND_LT] = 1, + [TCG_COND_LTU] = 1, + [TCG_COND_LE] = 2, + [TCG_COND_LEU] = 2, + [TCG_COND_NEVER] = 3, + [TCG_COND_GE] = 5, + [TCG_COND_GEU] = 5, + [TCG_COND_GT] = 6, + [TCG_COND_GTU] = 6, + [TCG_COND_ALWAYS] = 7, + }; + + switch (cond) { + case TCG_COND_TSTNE: + tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type); + break; + case TCG_COND_TSTEQ: + tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type); + break; + default: + tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], + /* k1 */ 1, v1, v2, type); + tcg_out8(s, cond_ext[cond]); + break; + } +} + +static void tcg_out_k1_to_vec(TCGContext *s, TCGType type, + unsigned vece, TCGReg dest) +{ + static const int movm_insn[] = { + OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q + }; + tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type); +} + +static void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) +{ + /* + * With avx512, we have a complete set of comparisons into mask. + * Unless there's a single insn expansion for the comparision, + * expand via a mask in k1. + */ + if ((vece <= MO_16 ? have_avx512bw : have_avx512dq) + && cond != TCG_COND_EQ + && cond != TCG_COND_LT + && cond != TCG_COND_GT) { + tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond); + tcg_out_k1_to_vec(s, type, vece, v0); + return; + } + + if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) { + tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1); + tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type); + } +} + +static void tcg_out_cmpsel_vec_k1(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg c1, TCGReg c2, + TCGReg v3, TCGReg v4, TCGCond cond) +{ + static const int vpblendm_insn[] = { + OPC_VPBLENDMB, OPC_VPBLENDMW, OPC_VPBLENDMD, OPC_VPBLENDMQ + }; + bool z = false; + + /* Swap to place constant in V4 to take advantage of zero-masking. */ + if (!v3) { + z = true; + v3 = v4; + cond = tcg_invert_cond(cond); + } + + tcg_out_cmp_vec_k1(s, type, vece, c1, c2, cond); + tcg_out_evex_modrm_type(s, vpblendm_insn[vece], v0, v4, v3, + /* k1 */1, z, type); +} + +static void tcg_out_cmpsel_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg c1, TCGReg c2, + TCGReg v3, TCGReg v4, TCGCond cond) +{ + bool inv; + + if (vece <= MO_16 ? have_avx512bw : have_avx512vl) { + tcg_out_cmpsel_vec_k1(s, type, vece, v0, c1, c2, v3, v4, cond); + return; + } + + inv = tcg_out_cmp_vec_noinv(s, type, vece, TCG_TMP_VEC, c1, c2, cond); + + /* + * Since XMM0 is 16, the only way we get 0 into V3 + * is via the constant zero constraint. + */ + if (!v3) { + if (inv) { + tcg_out_vex_modrm_type(s, OPC_PAND, v0, TCG_TMP_VEC, v4, type); + } else { + tcg_out_vex_modrm_type(s, OPC_PANDN, v0, TCG_TMP_VEC, v4, type); + } + } else { + if (inv) { + TCGReg swap = v3; + v3 = v4; + v4 = swap; + } + tcg_out_vex_modrm_type(s, OPC_VPBLENDVB, v0, v4, v3, type); + tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4); + } +} + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3050,12 +3306,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static int const shift_imm_insn[4] = { OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib }; - static int const cmpeq_insn[4] = { - OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ - }; - static int const cmpgt_insn[4] = { - OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ - }; static int const punpckl_insn[4] = { OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ }; @@ -3074,12 +3324,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static int const smax_insn[4] = { OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ }; - static int const umin_insn[4] = { - OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ - }; - static int const umax_insn[4] = { - OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ - }; static int const rotlv_insn[4] = { OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ }; @@ -3231,29 +3475,21 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, goto gen_simd; gen_simd: tcg_debug_assert(insn != OPC_UD2); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); + tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); break; case INDEX_op_cmp_vec: - sub = args[3]; - if (sub == TCG_COND_EQ) { - insn = cmpeq_insn[vece]; - } else if (sub == TCG_COND_GT) { - insn = cmpgt_insn[vece]; - } else { - g_assert_not_reached(); - } - goto gen_simd; + tcg_out_cmp_vec(s, type, vece, a0, a1, a2, args[3]); + break; + + case INDEX_op_cmpsel_vec: + tcg_out_cmpsel_vec(s, type, vece, a0, a1, a2, + args[3], args[4], args[5]); + break; case INDEX_op_andc_vec: insn = OPC_PANDN; - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a2, a1); + tcg_out_vex_modrm_type(s, insn, a0, a2, a1, type); break; case INDEX_op_shli_vec: @@ -3281,10 +3517,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, goto gen_shift; gen_shift: tcg_debug_assert(vece != MO_8); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, sub, a0, a1); + tcg_out_vex_modrm_type(s, insn, sub, a0, a1, type); tcg_out8(s, a2); break; @@ -3361,22 +3594,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, gen_simd_imm8: tcg_debug_assert(insn != OPC_UD2); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); + tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); tcg_out8(s, sub); break; - case INDEX_op_x86_vpblendvb_vec: - insn = OPC_VPBLENDVB; - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); - tcg_out8(s, args[3] << 4); - break; - case INDEX_op_x86_psrldq_vec: tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1); tcg_out8(s, a2); @@ -3642,8 +3863,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I3(x, 0, x, x); case INDEX_op_bitsel_vec: - case INDEX_op_x86_vpblendvb_vec: return C_O1_I3(x, x, x, x); + case INDEX_op_cmpsel_vec: + return C_O1_I4(x, x, x, xO, x); default: g_assert_not_reached(); @@ -3979,145 +4201,59 @@ static void expand_vec_mul(TCGType type, unsigned vece, } } -static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) +static TCGCond expand_vec_cond(TCGType type, unsigned vece, + TCGArg *a1, TCGArg *a2, TCGCond cond) { - enum { - NEED_INV = 1, - NEED_SWAP = 2, - NEED_BIAS = 4, - NEED_UMIN = 8, - NEED_UMAX = 16, - }; - TCGv_vec t1, t2, t3; - uint8_t fixup; - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_GT: - fixup = 0; - break; - case TCG_COND_NE: - case TCG_COND_LE: - fixup = NEED_INV; - break; - case TCG_COND_LT: - fixup = NEED_SWAP; - break; - case TCG_COND_GE: - fixup = NEED_SWAP | NEED_INV; - break; - case TCG_COND_LEU: - if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { - fixup = NEED_UMIN; - } else { - fixup = NEED_BIAS | NEED_INV; - } - break; - case TCG_COND_GTU: - if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { - fixup = NEED_UMIN | NEED_INV; - } else { - fixup = NEED_BIAS; - } - break; - case TCG_COND_GEU: - if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { - fixup = NEED_UMAX; - } else { - fixup = NEED_BIAS | NEED_SWAP | NEED_INV; - } - break; - case TCG_COND_LTU: - if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { - fixup = NEED_UMAX | NEED_INV; - } else { - fixup = NEED_BIAS | NEED_SWAP; - } - break; - default: - g_assert_not_reached(); - } - - if (fixup & NEED_INV) { - cond = tcg_invert_cond(cond); - } - if (fixup & NEED_SWAP) { - t1 = v1, v1 = v2, v2 = t1; - cond = tcg_swap_cond(cond); - } + /* + * Without AVX512, there are no 64-bit unsigned comparisons. + * We must bias the inputs so that they become signed. + * All other swapping and inversion are handled during code generation. + */ + if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) { + TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1)); + TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2)); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); - t1 = t2 = NULL; - if (fixup & (NEED_UMIN | NEED_UMAX)) { - t1 = tcg_temp_new_vec(type); - if (fixup & NEED_UMIN) { - tcg_gen_umin_vec(vece, t1, v1, v2); - } else { - tcg_gen_umax_vec(vece, t1, v1, v2); - } - v2 = t1; - cond = TCG_COND_EQ; - } else if (fixup & NEED_BIAS) { - t1 = tcg_temp_new_vec(type); - t2 = tcg_temp_new_vec(type); - t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); tcg_gen_sub_vec(vece, t1, v1, t3); tcg_gen_sub_vec(vece, t2, v2, t3); - v1 = t1; - v2 = t2; + *a1 = tcgv_vec_arg(t1); + *a2 = tcgv_vec_arg(t2); cond = tcg_signed_cond(cond); } - - tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT); - /* Expand directly; do not recurse. */ - vec_gen_4(INDEX_op_cmp_vec, type, vece, - tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); - - if (t1) { - tcg_temp_free_vec(t1); - if (t2) { - tcg_temp_free_vec(t2); - } - } - return fixup & NEED_INV; + return cond; } -static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) +static void expand_vec_cmp(TCGType type, unsigned vece, TCGArg a0, + TCGArg a1, TCGArg a2, TCGCond cond) { - if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) { - tcg_gen_not_vec(vece, v0, v0); - } + cond = expand_vec_cond(type, vece, &a1, &a2, cond); + /* Expand directly; do not recurse. */ + vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond); } -static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec c1, TCGv_vec c2, - TCGv_vec v3, TCGv_vec v4, TCGCond cond) +static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGArg a0, + TCGArg a1, TCGArg a2, + TCGArg a3, TCGArg a4, TCGCond cond) { - TCGv_vec t = tcg_temp_new_vec(type); - - if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) { - /* Invert the sense of the compare by swapping arguments. */ - TCGv_vec x; - x = v3, v3 = v4, v4 = x; - } - vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece, - tcgv_vec_arg(v0), tcgv_vec_arg(v4), - tcgv_vec_arg(v3), tcgv_vec_arg(t)); - tcg_temp_free_vec(t); + cond = expand_vec_cond(type, vece, &a1, &a2, cond); + /* Expand directly; do not recurse. */ + vec_gen_6(INDEX_op_cmpsel_vec, type, vece, a0, a1, a2, a3, a4, cond); } void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, TCGArg a0, ...) { va_list va; - TCGArg a2; - TCGv_vec v0, v1, v2, v3, v4; + TCGArg a1, a2, a3, a4, a5; + TCGv_vec v0, v1, v2; va_start(va, a0); - v0 = temp_tcgv_vec(arg_temp(a0)); - v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + a1 = va_arg(va, TCGArg); a2 = va_arg(va, TCGArg); + v0 = temp_tcgv_vec(arg_temp(a0)); + v1 = temp_tcgv_vec(arg_temp(a1)); switch (opc) { case INDEX_op_shli_vec: @@ -4153,15 +4289,15 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, break; case INDEX_op_cmp_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); + a3 = va_arg(va, TCGArg); + expand_vec_cmp(type, vece, a0, a1, a2, a3); break; case INDEX_op_cmpsel_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg)); + a3 = va_arg(va, TCGArg); + a4 = va_arg(va, TCGArg); + a5 = va_arg(va, TCGArg); + expand_vec_cmpsel(type, vece, a0, a1, a2, a3, a4, a5); break; default: |