diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2024-09-27 10:32:29 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2024-09-27 10:32:29 +0100 |
commit | 4ae7d11b70a840eec7aa27269093b15d04ebc84e (patch) | |
tree | c3b2612319a7b3e392d38fd0f97ed40cb1f0747d /tcg | |
parent | e10cd93872c31332b002c933a798ab0bc51705a4 (diff) | |
parent | c4d80fa63e823dc8dbf094b29e39b6978a3073b6 (diff) |
Merge tag 'pull-tcg-20240922' of https://gitlab.com/rth7680/qemu into staging
target/ppc: Fix lxvx/stxvx facility check
linux-user: update syscall_nr.h to Linux v6.10
linux-user: update syscall.tbl to Linux v6.11
tcg: Fix iteration step in 32-bit gvec operation
tcg: Propagate new TCGOp to add_as_label_use
tcg/*: Do not expand cmp_vec, cmpsel_vec early
tcg/optimize: Fold movcond with true and false values identical
tcg/optimize: Optimize cmp_vec and cmpsel_vec
tcg/optimize: Optimize bitsel_vec
tcg/i386: Optimize cmpsel with constant 0 operand 3.
tcg/i386: Implement cmp_vec with avx512 insns
tcg/i386: Implement cmpsel_vec with avx512 insns
tcg/i386: Implement vector TST{EQ,NE} for avx512
tcg/ppc: Implement cmpsel_vec and optimize with constant 0/-1 arguments
tcg/s390x: Implement cmpsel_vec and optimize with constant 0/-1 arguments
# -----BEGIN PGP SIGNATURE-----
#
# iQFQBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmbwBsIdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/bzwf40V8fsRhfc8u/k2Xw
# +bXyfyX7ydgB+82YoO71+Wy15ntmYmbL/6O9kGuJXWX8HRDrAR8Js9zDgveysw5m
# m/EG+XAbVjYhjaoaaz2nfr+5auQoos9NoCji3s3UJln6sCuZ2Enl0DTFluoHZfgr
# /YVAghJ4dwH0hfzO9kmsKmJ9I19HugMMN4dNvEcjQyDDUhgGGjkUaRtGNxwuiwuf
# ArL2XhFauXgloryDmpUKQ0BJJ0t8rdPlaRJ8dHqJUnJwav0TW3QVXbrs8gOkmmbg
# 9plYTC9DlTguQobvapCDVjiGy/6yuSF/AUpcdQbTRtTi3BrzhTydfLlOtI64GtaQ
# OBKd
# =ID/g
# -----END PGP SIGNATURE-----
# gpg: Signature made Sun 22 Sep 2024 13:00:02 BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* tag 'pull-tcg-20240922' of https://gitlab.com/rth7680/qemu: (31 commits)
linux-user: update syscall.tbl to Linux v6.11
linux-user,loongarch: move to syscalltbl file
linux-user,hexagon: move to syscalltbl file
linux-user,riscv: move to syscalltbl file
linux-user,openrisc: move to syscalltbl file
linux-user,aarch64: move to syscalltbl file
linux-user: update syscall.tbl to Linux v6.10
linux-user, mips: update syscall-args-o32.c.inc to Linux v6.10
linux-user: update syscall_nr.h to Linux v6.10
target/ppc: Fix lxvx/stxvx facility check
tcg/s390x: Optimize cmpsel with constant 0/-1 arguments
tcg/s390x: Implement cmpsel_vec
tcg/ppc: Optimize cmpsel with constant 0/-1 arguments
tcg/ppc: Implement cmpsel_vec
tcg/i386: Implement vector TST{EQ,NE} for avx512
tcg/i386: Implement cmpsel_vec with avx512 insns
tcg/i386: Add predicate parameters to tcg_out_evex_opc
tcg/i386: Implement cmp_vec with avx512 insns
tcg/i386: Optimize cmpsel with constant 0 operand 3.
tcg/optimize: Optimize bitsel_vec
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/i386/tcg-target-con-set.h | 1 | ||||
-rw-r--r-- | tcg/i386/tcg-target-con-str.h | 1 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 486 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 4 | ||||
-rw-r--r-- | tcg/i386/tcg-target.opc.h | 1 | ||||
-rw-r--r-- | tcg/optimize.c | 99 | ||||
-rw-r--r-- | tcg/ppc/tcg-target-con-set.h | 1 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c.inc | 254 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/s390x/tcg-target-con-set.h | 2 | ||||
-rw-r--r-- | tcg/s390x/tcg-target-con-str.h | 1 | ||||
-rw-r--r-- | tcg/s390x/tcg-target.c.inc | 195 | ||||
-rw-r--r-- | tcg/s390x/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/tcg-internal.h | 14 | ||||
-rw-r--r-- | tcg/tcg-op-gvec.c | 2 | ||||
-rw-r--r-- | tcg/tcg-op-vec.c | 4 | ||||
-rw-r--r-- | tcg/tcg-op.c | 86 |
17 files changed, 748 insertions, 407 deletions
diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index e24241cfa2..06e6521001 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -50,6 +50,7 @@ C_N1_I2(r, r, r) C_N1_I2(r, r, rW) C_O1_I3(x, 0, x, x) C_O1_I3(x, x, x, x) +C_O1_I4(x, x, x, xO, x) C_O1_I4(r, r, reT, r, 0) C_O1_I4(r, r, r, ri, ri) C_O2_I1(r, r, L) diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h index cc22db227b..52142ab121 100644 --- a/tcg/i386/tcg-target-con-str.h +++ b/tcg/i386/tcg-target-con-str.h @@ -28,6 +28,7 @@ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */ */ CONST('e', TCG_CT_CONST_S32) CONST('I', TCG_CT_CONST_I32) +CONST('O', TCG_CT_CONST_ZERO) CONST('T', TCG_CT_CONST_TST) CONST('W', TCG_CT_CONST_WSZ) CONST('Z', TCG_CT_CONST_U32) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 9a54ef7f8d..1bf50f1f62 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -133,6 +133,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_I32 0x400 #define TCG_CT_CONST_WSZ 0x800 #define TCG_CT_CONST_TST 0x1000 +#define TCG_CT_CONST_ZERO 0x2000 /* Registers used with L constraint, which are the first argument registers on x86_64, and two random call clobbered registers on @@ -226,6 +227,9 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return 1; } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } return 0; } @@ -409,6 +413,18 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_UD2 (0x0b | P_EXT) #define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) #define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) +#define OPC_VPBLENDMB (0x66 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPBLENDMW (0x66 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPBLENDMD (0x64 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPBLENDMQ (0x64 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPB (0x3f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUB (0x3e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPW (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUW (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPD (0x1f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUD (0x1e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPQ (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUQ (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) #define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) #define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) #define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) @@ -417,6 +433,10 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) +#define OPC_VPMOVM2B (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2W (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPMOVM2D (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2Q (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) #define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) #define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) @@ -442,6 +462,14 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) #define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMB (0x26 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMW (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMD (0x27 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMQ (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMB (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMW (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMD (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMQ (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) #define OPC_XCHG_EvGv (0x87) @@ -658,7 +686,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, } static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, - int rm, int index) + int rm, int index, int aaa, bool z) { /* The entire 4-byte evex prefix; with R' and V' set. */ uint32_t p = 0x08041062; @@ -695,7 +723,9 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, p = deposit32(p, 16, 2, pp); p = deposit32(p, 19, 4, ~v); p = deposit32(p, 23, 1, (opc & P_VEXW) != 0); + p = deposit32(p, 24, 3, aaa); p = deposit32(p, 29, 2, (opc & P_VEXL) != 0); + p = deposit32(p, 31, 1, z); tcg_out32(s, p); tcg_out8(s, opc); @@ -704,13 +734,32 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) { if (opc & P_EVEX) { - tcg_out_evex_opc(s, opc, r, v, rm, 0); + tcg_out_evex_opc(s, opc, r, v, rm, 0, 0, false); } else { tcg_out_vex_opc(s, opc, r, v, rm, 0); } tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); } +static void tcg_out_vex_modrm_type(TCGContext *s, int opc, + int r, int v, int rm, TCGType type) +{ + if (type == TCG_TYPE_V256) { + opc |= P_VEXL; + } + tcg_out_vex_modrm(s, opc, r, v, rm); +} + +static void tcg_out_evex_modrm_type(TCGContext *s, int opc, int r, int v, + int rm, int aaa, bool z, TCGType type) +{ + if (type == TCG_TYPE_V256) { + opc |= P_VEXL; + } + tcg_out_evex_opc(s, opc, r, v, rm, 0, aaa, z); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + /* Output an opcode with a full "rm + (index<<shift) + offset" address mode. We handle either RM and INDEX missing with a negative value. In 64-bit mode for absolute addresses, ~RM is the size of the immediate operand @@ -904,8 +953,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg a) { if (have_avx2) { - int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); - tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a); + tcg_out_vex_modrm_type(s, avx2_dup_insn[vece], r, 0, a, type); } else { switch (vece) { case MO_8: @@ -3021,6 +3069,214 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, #undef OP_32_64 } +static int const umin_insn[4] = { + OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ +}; + +static int const umax_insn[4] = { + OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ +}; + +static bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) +{ + static int const cmpeq_insn[4] = { + OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ + }; + static int const cmpgt_insn[4] = { + OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ + }; + + enum { + NEED_INV = 1, + NEED_SWAP = 2, + NEED_UMIN = 4, + NEED_UMAX = 8, + INVALID = 16, + }; + static const uint8_t cond_fixup[16] = { + [0 ... 15] = INVALID, + [TCG_COND_EQ] = 0, + [TCG_COND_GT] = 0, + [TCG_COND_NE] = NEED_INV, + [TCG_COND_LE] = NEED_INV, + [TCG_COND_LT] = NEED_SWAP, + [TCG_COND_GE] = NEED_SWAP | NEED_INV, + [TCG_COND_LEU] = NEED_UMIN, + [TCG_COND_GTU] = NEED_UMIN | NEED_INV, + [TCG_COND_GEU] = NEED_UMAX, + [TCG_COND_LTU] = NEED_UMAX | NEED_INV, + }; + int fixup = cond_fixup[cond]; + + assert(!(fixup & INVALID)); + + if (fixup & NEED_INV) { + cond = tcg_invert_cond(cond); + } + + if (fixup & NEED_SWAP) { + TCGReg swap = v1; + v1 = v2; + v2 = swap; + cond = tcg_swap_cond(cond); + } + + if (fixup & (NEED_UMIN | NEED_UMAX)) { + int op = (fixup & NEED_UMIN ? umin_insn[vece] : umax_insn[vece]); + + /* avx2 does not have 64-bit min/max; adjusted during expand. */ + assert(vece <= MO_32); + + tcg_out_vex_modrm_type(s, op, TCG_TMP_VEC, v1, v2, type); + v2 = TCG_TMP_VEC; + cond = TCG_COND_EQ; + } + + switch (cond) { + case TCG_COND_EQ: + tcg_out_vex_modrm_type(s, cmpeq_insn[vece], v0, v1, v2, type); + break; + case TCG_COND_GT: + tcg_out_vex_modrm_type(s, cmpgt_insn[vece], v0, v1, v2, type); + break; + default: + g_assert_not_reached(); + } + return fixup & NEED_INV; +} + +static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, + TCGReg v1, TCGReg v2, TCGCond cond) +{ + static const int cmpm_insn[2][4] = { + { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ }, + { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ } + }; + static const int testm_insn[4] = { + OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ + }; + static const int testnm_insn[4] = { + OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ + }; + + static const int cond_ext[16] = { + [TCG_COND_EQ] = 0, + [TCG_COND_NE] = 4, + [TCG_COND_LT] = 1, + [TCG_COND_LTU] = 1, + [TCG_COND_LE] = 2, + [TCG_COND_LEU] = 2, + [TCG_COND_NEVER] = 3, + [TCG_COND_GE] = 5, + [TCG_COND_GEU] = 5, + [TCG_COND_GT] = 6, + [TCG_COND_GTU] = 6, + [TCG_COND_ALWAYS] = 7, + }; + + switch (cond) { + case TCG_COND_TSTNE: + tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type); + break; + case TCG_COND_TSTEQ: + tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type); + break; + default: + tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], + /* k1 */ 1, v1, v2, type); + tcg_out8(s, cond_ext[cond]); + break; + } +} + +static void tcg_out_k1_to_vec(TCGContext *s, TCGType type, + unsigned vece, TCGReg dest) +{ + static const int movm_insn[] = { + OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q + }; + tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type); +} + +static void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) +{ + /* + * With avx512, we have a complete set of comparisons into mask. + * Unless there's a single insn expansion for the comparision, + * expand via a mask in k1. + */ + if ((vece <= MO_16 ? have_avx512bw : have_avx512dq) + && cond != TCG_COND_EQ + && cond != TCG_COND_LT + && cond != TCG_COND_GT) { + tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond); + tcg_out_k1_to_vec(s, type, vece, v0); + return; + } + + if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) { + tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1); + tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type); + } +} + +static void tcg_out_cmpsel_vec_k1(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg c1, TCGReg c2, + TCGReg v3, TCGReg v4, TCGCond cond) +{ + static const int vpblendm_insn[] = { + OPC_VPBLENDMB, OPC_VPBLENDMW, OPC_VPBLENDMD, OPC_VPBLENDMQ + }; + bool z = false; + + /* Swap to place constant in V4 to take advantage of zero-masking. */ + if (!v3) { + z = true; + v3 = v4; + cond = tcg_invert_cond(cond); + } + + tcg_out_cmp_vec_k1(s, type, vece, c1, c2, cond); + tcg_out_evex_modrm_type(s, vpblendm_insn[vece], v0, v4, v3, + /* k1 */1, z, type); +} + +static void tcg_out_cmpsel_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg v0, TCGReg c1, TCGReg c2, + TCGReg v3, TCGReg v4, TCGCond cond) +{ + bool inv; + + if (vece <= MO_16 ? have_avx512bw : have_avx512vl) { + tcg_out_cmpsel_vec_k1(s, type, vece, v0, c1, c2, v3, v4, cond); + return; + } + + inv = tcg_out_cmp_vec_noinv(s, type, vece, TCG_TMP_VEC, c1, c2, cond); + + /* + * Since XMM0 is 16, the only way we get 0 into V3 + * is via the constant zero constraint. + */ + if (!v3) { + if (inv) { + tcg_out_vex_modrm_type(s, OPC_PAND, v0, TCG_TMP_VEC, v4, type); + } else { + tcg_out_vex_modrm_type(s, OPC_PANDN, v0, TCG_TMP_VEC, v4, type); + } + } else { + if (inv) { + TCGReg swap = v3; + v3 = v4; + v4 = swap; + } + tcg_out_vex_modrm_type(s, OPC_VPBLENDVB, v0, v4, v3, type); + tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4); + } +} + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3050,12 +3306,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static int const shift_imm_insn[4] = { OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib }; - static int const cmpeq_insn[4] = { - OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ - }; - static int const cmpgt_insn[4] = { - OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ - }; static int const punpckl_insn[4] = { OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ }; @@ -3074,12 +3324,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static int const smax_insn[4] = { OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ }; - static int const umin_insn[4] = { - OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ - }; - static int const umax_insn[4] = { - OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ - }; static int const rotlv_insn[4] = { OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ }; @@ -3231,29 +3475,21 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, goto gen_simd; gen_simd: tcg_debug_assert(insn != OPC_UD2); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); + tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); break; case INDEX_op_cmp_vec: - sub = args[3]; - if (sub == TCG_COND_EQ) { - insn = cmpeq_insn[vece]; - } else if (sub == TCG_COND_GT) { - insn = cmpgt_insn[vece]; - } else { - g_assert_not_reached(); - } - goto gen_simd; + tcg_out_cmp_vec(s, type, vece, a0, a1, a2, args[3]); + break; + + case INDEX_op_cmpsel_vec: + tcg_out_cmpsel_vec(s, type, vece, a0, a1, a2, + args[3], args[4], args[5]); + break; case INDEX_op_andc_vec: insn = OPC_PANDN; - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a2, a1); + tcg_out_vex_modrm_type(s, insn, a0, a2, a1, type); break; case INDEX_op_shli_vec: @@ -3281,10 +3517,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, goto gen_shift; gen_shift: tcg_debug_assert(vece != MO_8); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, sub, a0, a1); + tcg_out_vex_modrm_type(s, insn, sub, a0, a1, type); tcg_out8(s, a2); break; @@ -3361,22 +3594,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, gen_simd_imm8: tcg_debug_assert(insn != OPC_UD2); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); + tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); tcg_out8(s, sub); break; - case INDEX_op_x86_vpblendvb_vec: - insn = OPC_VPBLENDVB; - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); - tcg_out8(s, args[3] << 4); - break; - case INDEX_op_x86_psrldq_vec: tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1); tcg_out8(s, a2); @@ -3642,8 +3863,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I3(x, 0, x, x); case INDEX_op_bitsel_vec: - case INDEX_op_x86_vpblendvb_vec: return C_O1_I3(x, x, x, x); + case INDEX_op_cmpsel_vec: + return C_O1_I4(x, x, x, xO, x); default: g_assert_not_reached(); @@ -3979,145 +4201,59 @@ static void expand_vec_mul(TCGType type, unsigned vece, } } -static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) +static TCGCond expand_vec_cond(TCGType type, unsigned vece, + TCGArg *a1, TCGArg *a2, TCGCond cond) { - enum { - NEED_INV = 1, - NEED_SWAP = 2, - NEED_BIAS = 4, - NEED_UMIN = 8, - NEED_UMAX = 16, - }; - TCGv_vec t1, t2, t3; - uint8_t fixup; - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_GT: - fixup = 0; - break; - case TCG_COND_NE: - case TCG_COND_LE: - fixup = NEED_INV; - break; - case TCG_COND_LT: - fixup = NEED_SWAP; - break; - case TCG_COND_GE: - fixup = NEED_SWAP | NEED_INV; - break; - case TCG_COND_LEU: - if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { - fixup = NEED_UMIN; - } else { - fixup = NEED_BIAS | NEED_INV; - } - break; - case TCG_COND_GTU: - if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { - fixup = NEED_UMIN | NEED_INV; - } else { - fixup = NEED_BIAS; - } - break; - case TCG_COND_GEU: - if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { - fixup = NEED_UMAX; - } else { - fixup = NEED_BIAS | NEED_SWAP | NEED_INV; - } - break; - case TCG_COND_LTU: - if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { - fixup = NEED_UMAX | NEED_INV; - } else { - fixup = NEED_BIAS | NEED_SWAP; - } - break; - default: - g_assert_not_reached(); - } - - if (fixup & NEED_INV) { - cond = tcg_invert_cond(cond); - } - if (fixup & NEED_SWAP) { - t1 = v1, v1 = v2, v2 = t1; - cond = tcg_swap_cond(cond); - } + /* + * Without AVX512, there are no 64-bit unsigned comparisons. + * We must bias the inputs so that they become signed. + * All other swapping and inversion are handled during code generation. + */ + if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) { + TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1)); + TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2)); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); - t1 = t2 = NULL; - if (fixup & (NEED_UMIN | NEED_UMAX)) { - t1 = tcg_temp_new_vec(type); - if (fixup & NEED_UMIN) { - tcg_gen_umin_vec(vece, t1, v1, v2); - } else { - tcg_gen_umax_vec(vece, t1, v1, v2); - } - v2 = t1; - cond = TCG_COND_EQ; - } else if (fixup & NEED_BIAS) { - t1 = tcg_temp_new_vec(type); - t2 = tcg_temp_new_vec(type); - t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); tcg_gen_sub_vec(vece, t1, v1, t3); tcg_gen_sub_vec(vece, t2, v2, t3); - v1 = t1; - v2 = t2; + *a1 = tcgv_vec_arg(t1); + *a2 = tcgv_vec_arg(t2); cond = tcg_signed_cond(cond); } - - tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT); - /* Expand directly; do not recurse. */ - vec_gen_4(INDEX_op_cmp_vec, type, vece, - tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); - - if (t1) { - tcg_temp_free_vec(t1); - if (t2) { - tcg_temp_free_vec(t2); - } - } - return fixup & NEED_INV; + return cond; } -static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) +static void expand_vec_cmp(TCGType type, unsigned vece, TCGArg a0, + TCGArg a1, TCGArg a2, TCGCond cond) { - if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) { - tcg_gen_not_vec(vece, v0, v0); - } + cond = expand_vec_cond(type, vece, &a1, &a2, cond); + /* Expand directly; do not recurse. */ + vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond); } -static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec c1, TCGv_vec c2, - TCGv_vec v3, TCGv_vec v4, TCGCond cond) +static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGArg a0, + TCGArg a1, TCGArg a2, + TCGArg a3, TCGArg a4, TCGCond cond) { - TCGv_vec t = tcg_temp_new_vec(type); - - if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) { - /* Invert the sense of the compare by swapping arguments. */ - TCGv_vec x; - x = v3, v3 = v4, v4 = x; - } - vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece, - tcgv_vec_arg(v0), tcgv_vec_arg(v4), - tcgv_vec_arg(v3), tcgv_vec_arg(t)); - tcg_temp_free_vec(t); + cond = expand_vec_cond(type, vece, &a1, &a2, cond); + /* Expand directly; do not recurse. */ + vec_gen_6(INDEX_op_cmpsel_vec, type, vece, a0, a1, a2, a3, a4, cond); } void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, TCGArg a0, ...) { va_list va; - TCGArg a2; - TCGv_vec v0, v1, v2, v3, v4; + TCGArg a1, a2, a3, a4, a5; + TCGv_vec v0, v1, v2; va_start(va, a0); - v0 = temp_tcgv_vec(arg_temp(a0)); - v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + a1 = va_arg(va, TCGArg); a2 = va_arg(va, TCGArg); + v0 = temp_tcgv_vec(arg_temp(a0)); + v1 = temp_tcgv_vec(arg_temp(a1)); switch (opc) { case INDEX_op_shli_vec: @@ -4153,15 +4289,15 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, break; case INDEX_op_cmp_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); + a3 = va_arg(va, TCGArg); + expand_vec_cmp(type, vece, a0, a1, a2, a3); break; case INDEX_op_cmpsel_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg)); + a3 = va_arg(va, TCGArg); + a4 = va_arg(va, TCGArg); + a5 = va_arg(va, TCGArg); + expand_vec_cmpsel(type, vece, a0, a1, a2, a3, a4, a5); break; default: diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 2f67a97e05..c68ac023d8 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -223,8 +223,8 @@ typedef enum { #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 #define TCG_TARGET_HAS_bitsel_vec have_avx512vl -#define TCG_TARGET_HAS_cmpsel_vec -1 -#define TCG_TARGET_HAS_tst_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec 1 +#define TCG_TARGET_HAS_tst_vec have_avx512bw #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \ diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h index b5f403e35e..4ffc084bda 100644 --- a/tcg/i386/tcg-target.opc.h +++ b/tcg/i386/tcg-target.opc.h @@ -25,7 +25,6 @@ */ DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC) -DEF(x86_vpblendvb_vec, 1, 3, 0, IMPLVEC) DEF(x86_blend_vec, 1, 2, 1, IMPLVEC) DEF(x86_packss_vec, 1, 2, 0, IMPLVEC) DEF(x86_packus_vec, 1, 2, 0, IMPLVEC) diff --git a/tcg/optimize.c b/tcg/optimize.c index ba16ec27e2..e9ef16b3c6 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1851,6 +1851,11 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) { int i; + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[3], op->args[4])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); + } + /* * Canonicalize the "false" input reg to match the destination reg so * that the tcg backend can implement a "move if true" operation. @@ -2417,6 +2422,36 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) return tcg_opt_gen_movi(ctx, op, op->args[0], i); } +static bool fold_cmp_vec(OptContext *ctx, TCGOp *op) +{ + /* Canonicalize the comparison to put immediate second. */ + if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { + op->args[3] = tcg_swap_cond(op->args[3]); + } + return false; +} + +static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op) +{ + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[3], op->args[4])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); + } + + /* Canonicalize the comparison to put immediate second. */ + if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { + op->args[5] = tcg_swap_cond(op->args[5]); + } + /* + * Canonicalize the "false" input reg to match the destination, + * so that the tcg backend can implement "move if true". + */ + if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { + op->args[5] = tcg_invert_cond(op->args[5]); + } + return false; +} + static bool fold_sextract(OptContext *ctx, TCGOp *op) { uint64_t z_mask, s_mask, s_mask_old; @@ -2702,6 +2737,61 @@ static bool fold_xor(OptContext *ctx, TCGOp *op) return fold_masks(ctx, op); } +static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) +{ + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[2], op->args[3])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); + } + + if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { + uint64_t tv = arg_info(op->args[2])->val; + uint64_t fv = arg_info(op->args[3])->val; + + if (tv == -1 && fv == 0) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); + } + if (tv == 0 && fv == -1) { + if (TCG_TARGET_HAS_not_vec) { + op->opc = INDEX_op_not_vec; + return fold_not(ctx, op); + } else { + op->opc = INDEX_op_xor_vec; + op->args[2] = arg_new_constant(ctx, -1); + return fold_xor(ctx, op); + } + } + } + if (arg_is_const(op->args[2])) { + uint64_t tv = arg_info(op->args[2])->val; + if (tv == -1) { + op->opc = INDEX_op_or_vec; + op->args[2] = op->args[3]; + return fold_or(ctx, op); + } + if (tv == 0 && TCG_TARGET_HAS_andc_vec) { + op->opc = INDEX_op_andc_vec; + op->args[2] = op->args[1]; + op->args[1] = op->args[3]; + return fold_andc(ctx, op); + } + } + if (arg_is_const(op->args[3])) { + uint64_t fv = arg_info(op->args[3])->val; + if (fv == 0) { + op->opc = INDEX_op_and_vec; + return fold_and(ctx, op); + } + if (fv == -1 && TCG_TARGET_HAS_orc_vec) { + op->opc = INDEX_op_orc_vec; + op->args[2] = op->args[1]; + op->args[1] = op->args[3]; + return fold_orc(ctx, op); + } + } + return false; +} + /* Propagate constants and copies, fold constant expressions. */ void tcg_optimize(TCGContext *s) { @@ -2923,6 +3013,15 @@ void tcg_optimize(TCGContext *s) case INDEX_op_setcond2_i32: done = fold_setcond2(&ctx, op); break; + case INDEX_op_cmp_vec: + done = fold_cmp_vec(&ctx, op); + break; + case INDEX_op_cmpsel_vec: + done = fold_cmpsel_vec(&ctx, op); + break; + case INDEX_op_bitsel_vec: + done = fold_bitsel_vec(&ctx, op); + break; CASE_OP_32_64(sextract): done = fold_sextract(&ctx, op); break; diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index 9f99bde505..453abde6c1 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -33,6 +33,7 @@ C_O1_I2(r, r, rU) C_O1_I2(r, r, rZW) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) +C_O1_I4(v, v, v, vZM, v) C_O1_I4(r, r, rC, rZ, rZ) C_O1_I4(r, r, r, ri, ri) C_O2_I1(r, r, r) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 3553a47ba9..3f413ce3c1 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3567,12 +3567,14 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_usadd_vec: case INDEX_op_ussub_vec: return vece <= MO_32; - case INDEX_op_cmp_vec: case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: case INDEX_op_rotli_vec: return vece <= MO_32 || have_isa_2_07 ? -1 : 0; + case INDEX_op_cmp_vec: + case INDEX_op_cmpsel_vec: + return vece <= MO_32 || have_isa_2_07 ? 1 : 0; case INDEX_op_neg_vec: return vece >= MO_32 && have_isa_3_00; case INDEX_op_mul_vec: @@ -3713,6 +3715,149 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, return true; } +static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1) +{ + tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1)); +} + +static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2)); +} + +static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2)); +} + +static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2)); +} + +static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2)); +} + +static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d, + TCGReg c, TCGReg t, TCGReg f) +{ + if (TCG_TARGET_HAS_bitsel_vec) { + tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f)); + } else { + tcg_out_and_vec(s, TCG_VEC_TMP2, t, c); + tcg_out_andc_vec(s, d, f, c); + tcg_out_or_vec(s, d, d, TCG_VEC_TMP2); + } +} + +static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0, + TCGReg a1, TCGReg a2, TCGCond cond) +{ + static const uint32_t + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, + ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }; + uint32_t insn; + + bool need_swap = false, need_inv = false; + + tcg_debug_assert(vece <= MO_32 || have_isa_2_07); + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_GT: + case TCG_COND_GTU: + break; + case TCG_COND_NE: + if (have_isa_3_00 && vece <= MO_32) { + break; + } + /* fall through */ + case TCG_COND_LE: + case TCG_COND_LEU: + need_inv = true; + break; + case TCG_COND_LT: + case TCG_COND_LTU: + need_swap = true; + break; + case TCG_COND_GE: + case TCG_COND_GEU: + need_swap = need_inv = true; + break; + default: + g_assert_not_reached(); + } + + if (need_inv) { + cond = tcg_invert_cond(cond); + } + if (need_swap) { + TCGReg swap = a1; + a1 = a2; + a2 = swap; + cond = tcg_swap_cond(cond); + } + + switch (cond) { + case TCG_COND_EQ: + insn = eq_op[vece]; + break; + case TCG_COND_NE: + insn = ne_op[vece]; + break; + case TCG_COND_GT: + insn = gts_op[vece]; + break; + case TCG_COND_GTU: + insn = gtu_op[vece]; + break; + default: + g_assert_not_reached(); + } + tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); + + return need_inv; +} + +static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0, + TCGReg a1, TCGReg a2, TCGCond cond) +{ + if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) { + tcg_out_not_vec(s, a0, a0); + } +} + +static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0, + TCGReg c1, TCGReg c2, TCGArg v3, int const_v3, + TCGReg v4, TCGCond cond) +{ + bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond); + + if (!const_v3) { + if (inv) { + tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3); + } else { + tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4); + } + } else if (v3) { + if (inv) { + tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1); + } else { + tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1); + } + } else { + if (inv) { + tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1); + } else { + tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1); + } + } +} + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3723,10 +3868,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, mul_op[4] = { 0, 0, VMULUWM, VMULLD }, neg_op[4] = { 0, 0, VNEGW, VNEGD }, - eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, - ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, - gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, @@ -3808,24 +3949,23 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, insn = sarv_op[vece]; break; case INDEX_op_and_vec: - insn = VAND; - break; + tcg_out_and_vec(s, a0, a1, a2); + return; case INDEX_op_or_vec: - insn = VOR; - break; + tcg_out_or_vec(s, a0, a1, a2); + return; case INDEX_op_xor_vec: insn = VXOR; break; case INDEX_op_andc_vec: - insn = VANDC; - break; + tcg_out_andc_vec(s, a0, a1, a2); + return; case INDEX_op_not_vec: - insn = VNOR; - a2 = a1; - break; + tcg_out_not_vec(s, a0, a1); + return; case INDEX_op_orc_vec: - insn = VORC; - break; + tcg_out_orc_vec(s, a0, a1, a2); + return; case INDEX_op_nand_vec: insn = VNAND; break; @@ -3837,26 +3977,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_cmp_vec: - switch (args[3]) { - case TCG_COND_EQ: - insn = eq_op[vece]; - break; - case TCG_COND_NE: - insn = ne_op[vece]; - break; - case TCG_COND_GT: - insn = gts_op[vece]; - break; - case TCG_COND_GTU: - insn = gtu_op[vece]; - break; - default: - g_assert_not_reached(); - } - break; - + tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]); + return; + case INDEX_op_cmpsel_vec: + tcg_out_cmpsel_vec(s, vece, a0, a1, a2, + args[3], const_args[3], args[4], args[5]); + return; case INDEX_op_bitsel_vec: - tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); + tcg_out_bitsel_vec(s, a0, a1, a2, args[3]); return; case INDEX_op_dup2_vec: @@ -3921,56 +4049,6 @@ static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, tcgv_vec_arg(v1), tcgv_vec_arg(t1)); } -static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) -{ - bool need_swap = false, need_inv = false; - - tcg_debug_assert(vece <= MO_32 || have_isa_2_07); - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_GT: - case TCG_COND_GTU: - break; - case TCG_COND_NE: - if (have_isa_3_00 && vece <= MO_32) { - break; - } - /* fall through */ - case TCG_COND_LE: - case TCG_COND_LEU: - need_inv = true; - break; - case TCG_COND_LT: - case TCG_COND_LTU: - need_swap = true; - break; - case TCG_COND_GE: - case TCG_COND_GEU: - need_swap = need_inv = true; - break; - default: - g_assert_not_reached(); - } - - if (need_inv) { - cond = tcg_invert_cond(cond); - } - if (need_swap) { - TCGv_vec t1; - t1 = v1, v1 = v2, v2 = t1; - cond = tcg_swap_cond(cond); - } - - vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), - tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); - - if (need_inv) { - tcg_gen_not_vec(vece, v0, v0); - } -} - static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) { @@ -4045,10 +4123,6 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, case INDEX_op_rotli_vec: expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec); break; - case INDEX_op_cmp_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); - break; case INDEX_op_mul_vec: v2 = temp_tcgv_vec(arg_temp(a2)); expand_vec_mul(type, vece, v0, v1, v2); @@ -4276,6 +4350,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_bitsel_vec: case INDEX_op_ppc_msum_vec: return C_O1_I3(v, v, v, v); + case INDEX_op_cmpsel_vec: + return C_O1_I4(v, v, v, vZM, v); default: g_assert_not_reached(); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index e154fb14df..0b2171d38c 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -172,7 +172,7 @@ typedef enum { #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 #define TCG_TARGET_HAS_bitsel_vec have_vsx -#define TCG_TARGET_HAS_cmpsel_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec 1 #define TCG_TARGET_HAS_tst_vec 0 #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index f75955eaa8..370e4b1295 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -38,6 +38,8 @@ C_O1_I2(r, rZ, r) C_O1_I2(v, v, r) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) +C_O1_I4(v, v, v, vZ, v) +C_O1_I4(v, v, v, vZM, v) C_O1_I4(r, r, ri, rI, r) C_O1_I4(r, r, rC, rI, r) C_O2_I1(o, m, r) diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h index 745f6c0df5..3e574e0662 100644 --- a/tcg/s390x/tcg-target-con-str.h +++ b/tcg/s390x/tcg-target-con-str.h @@ -20,6 +20,7 @@ CONST('C', TCG_CT_CONST_CMP) CONST('I', TCG_CT_CONST_S16) CONST('J', TCG_CT_CONST_S32) CONST('K', TCG_CT_CONST_P32) +CONST('M', TCG_CT_CONST_M1) CONST('N', TCG_CT_CONST_INV) CONST('R', TCG_CT_CONST_INVRISBG) CONST('U', TCG_CT_CONST_U32) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index ad587325fc..a5d57197a4 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -36,6 +36,7 @@ #define TCG_CT_CONST_INV (1 << 13) #define TCG_CT_CONST_INVRISBG (1 << 14) #define TCG_CT_CONST_CMP (1 << 15) +#define TCG_CT_CONST_M1 (1 << 16) #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -46,6 +47,7 @@ /* A scratch register that may be be used throughout the backend. */ #define TCG_TMP0 TCG_REG_R1 +#define TCG_VEC_TMP0 TCG_REG_V31 #define TCG_GUEST_BASE_REG TCG_REG_R13 @@ -606,6 +608,9 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return true; } + if ((ct & TCG_CT_CONST_M1) && val == -1) { + return true; + } if (ct & TCG_CT_CONST_INV) { val = ~val; @@ -2841,6 +2846,94 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64); } +static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0, + TCGReg a1, TCGReg a2, TCGCond cond) +{ + bool need_swap = false, need_inv = false; + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_GT: + case TCG_COND_GTU: + break; + case TCG_COND_NE: + case TCG_COND_LE: + case TCG_COND_LEU: + need_inv = true; + break; + case TCG_COND_LT: + case TCG_COND_LTU: + need_swap = true; + break; + case TCG_COND_GE: + case TCG_COND_GEU: + need_swap = need_inv = true; + break; + default: + g_assert_not_reached(); + } + + if (need_inv) { + cond = tcg_invert_cond(cond); + } + if (need_swap) { + TCGReg swap = a1; + a1 = a2; + a2 = swap; + cond = tcg_swap_cond(cond); + } + + switch (cond) { + case TCG_COND_EQ: + tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece); + break; + case TCG_COND_GT: + tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece); + break; + case TCG_COND_GTU: + tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece); + break; + default: + g_assert_not_reached(); + } + return need_inv; +} + +static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0, + TCGReg a1, TCGReg a2, TCGCond cond) +{ + if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) { + tcg_out_insn(s, VRRc, VNO, a0, a0, a0, 0); + } +} + +static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0, + TCGReg c1, TCGReg c2, TCGArg v3, + int const_v3, TCGReg v4, TCGCond cond) +{ + bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP0, c1, c2, cond); + + if (!const_v3) { + if (inv) { + tcg_out_insn(s, VRRe, VSEL, a0, v4, v3, TCG_VEC_TMP0); + } else { + tcg_out_insn(s, VRRe, VSEL, a0, v3, v4, TCG_VEC_TMP0); + } + } else if (v3) { + if (inv) { + tcg_out_insn(s, VRRc, VOC, a0, v4, TCG_VEC_TMP0, 0); + } else { + tcg_out_insn(s, VRRc, VO, a0, v4, TCG_VEC_TMP0, 0); + } + } else { + if (inv) { + tcg_out_insn(s, VRRc, VN, a0, v4, TCG_VEC_TMP0, 0); + } else { + tcg_out_insn(s, VRRc, VNC, a0, v4, TCG_VEC_TMP0, 0); + } + } +} + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2959,19 +3052,11 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_cmp_vec: - switch ((TCGCond)args[3]) { - case TCG_COND_EQ: - tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece); - break; - case TCG_COND_GT: - tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece); - break; - case TCG_COND_GTU: - tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece); - break; - default: - g_assert_not_reached(); - } + tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]); + break; + case INDEX_op_cmpsel_vec: + tcg_out_cmpsel_vec(s, vece, a0, a1, a2, args[3], const_args[3], + args[4], args[5]); break; case INDEX_op_s390_vuph_vec: @@ -3024,9 +3109,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_umax_vec: case INDEX_op_umin_vec: case INDEX_op_xor_vec: - return 1; case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: + return 1; case INDEX_op_rotrv_vec: return -1; case INDEX_op_mul_vec: @@ -3039,71 +3124,6 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) } } -static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) -{ - bool need_swap = false, need_inv = false; - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_GT: - case TCG_COND_GTU: - break; - case TCG_COND_NE: - case TCG_COND_LE: - case TCG_COND_LEU: - need_inv = true; - break; - case TCG_COND_LT: - case TCG_COND_LTU: - need_swap = true; - break; - case TCG_COND_GE: - case TCG_COND_GEU: - need_swap = need_inv = true; - break; - default: - g_assert_not_reached(); - } - - if (need_inv) { - cond = tcg_invert_cond(cond); - } - if (need_swap) { - TCGv_vec t1; - t1 = v1, v1 = v2, v2 = t1; - cond = tcg_swap_cond(cond); - } - - vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), - tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); - - return need_inv; -} - -static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) -{ - if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) { - tcg_gen_not_vec(vece, v0, v0); - } -} - -static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec c1, TCGv_vec c2, - TCGv_vec v3, TCGv_vec v4, TCGCond cond) -{ - TCGv_vec t = tcg_temp_new_vec(type); - - if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) { - /* Invert the sense of the compare by swapping arguments. */ - tcg_gen_bitsel_vec(vece, v0, t, v4, v3); - } else { - tcg_gen_bitsel_vec(vece, v0, t, v3, v4); - } - tcg_temp_free_vec(t); -} - static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc) { @@ -3145,7 +3165,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, TCGArg a0, ...) { va_list va; - TCGv_vec v0, v1, v2, v3, v4, t0; + TCGv_vec v0, v1, v2, t0; va_start(va, a0); v0 = temp_tcgv_vec(arg_temp(a0)); @@ -3153,16 +3173,6 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); switch (opc) { - case INDEX_op_cmp_vec: - expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); - break; - - case INDEX_op_cmpsel_vec: - v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg)); - break; - case INDEX_op_rotrv_vec: t0 = tcg_temp_new_vec(type); tcg_gen_neg_vec(vece, t0, v2); @@ -3397,6 +3407,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I2(v, v, r); case INDEX_op_bitsel_vec: return C_O1_I3(v, v, v, v); + case INDEX_op_cmpsel_vec: + return (TCG_TARGET_HAS_orc_vec + ? C_O1_I4(v, v, v, vZM, v) + : C_O1_I4(v, v, v, vZ, v)); default: g_assert_not_reached(); @@ -3521,6 +3535,7 @@ static void tcg_target_init(TCGContext *s) s->reserved_regs = 0; tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); /* XXX many insns can't be used with R0, so we better avoid it for now */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 62ce9d792a..86aeca166f 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -162,7 +162,7 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_sat_vec 0 #define TCG_TARGET_HAS_minmax_vec 1 #define TCG_TARGET_HAS_bitsel_vec 1 -#define TCG_TARGET_HAS_cmpsel_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec 1 #define TCG_TARGET_HAS_tst_vec 0 /* used for function call generation */ diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 9b0d982f65..8099248076 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -92,15 +92,17 @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind); */ TCGTemp *tcg_constant_internal(TCGType type, int64_t val); -void tcg_gen_op1(TCGOpcode, TCGArg); -void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg); -void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg); -void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); -void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); -void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); +TCGOp *tcg_gen_op1(TCGOpcode, TCGArg); +TCGOp *tcg_gen_op2(TCGOpcode, TCGArg, TCGArg); +TCGOp *tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg); +TCGOp *tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); +TCGOp *tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); +TCGOp *tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); +void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, + TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e); #endif /* TCG_INTERNAL_H */ diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 0308732d9b..78ee1ced80 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -3939,7 +3939,7 @@ void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t i; tcg_gen_extrl_i64_i32(t1, c); - for (i = 0; i < oprsz; i += 8) { + for (i = 0; i < oprsz; i += 4) { tcg_gen_ld_i32(t0, tcg_env, aofs + i); tcg_gen_negsetcond_i32(cond, t0, t0, t1); tcg_gen_st_i32(t0, tcg_env, dofs + i); diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index 84af210bc0..d4bb4aee74 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -172,8 +172,8 @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, op->args[3] = c; } -static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, - TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) +void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, + TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) { TCGOp *op = tcg_emit_op(opc, 6); TCGOP_VECL(op) = type - TCG_TYPE_V64; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index eff3728622..4a7e705367 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -37,38 +37,43 @@ */ #define NI __attribute__((noinline)) -void NI tcg_gen_op1(TCGOpcode opc, TCGArg a1) +TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGArg a1) { TCGOp *op = tcg_emit_op(opc, 1); op->args[0] = a1; + return op; } -void NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) +TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) { TCGOp *op = tcg_emit_op(opc, 2); op->args[0] = a1; op->args[1] = a2; + return op; } -void NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) +TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) { TCGOp *op = tcg_emit_op(opc, 3); op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; + return op; } -void NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) +TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, + TCGArg a3, TCGArg a4) { TCGOp *op = tcg_emit_op(opc, 4); op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; op->args[3] = a4; + return op; } -void NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, - TCGArg a4, TCGArg a5) +TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, + TCGArg a3, TCGArg a4, TCGArg a5) { TCGOp *op = tcg_emit_op(opc, 5); op->args[0] = a1; @@ -76,10 +81,11 @@ void NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, op->args[2] = a3; op->args[3] = a4; op->args[4] = a5; + return op; } -void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, - TCGArg a4, TCGArg a5, TCGArg a6) +TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, + TCGArg a4, TCGArg a5, TCGArg a6) { TCGOp *op = tcg_emit_op(opc, 6); op->args[0] = a1; @@ -88,6 +94,7 @@ void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, op->args[3] = a4; op->args[4] = a5; op->args[5] = a6; + return op; } /* @@ -110,9 +117,9 @@ static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) tcg_gen_op1(opc, tcgv_i64_arg(a1)); } -static void DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1) +static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1) { - tcg_gen_op1(opc, a1); + return tcg_gen_op1(opc, a1); } static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) @@ -189,16 +196,16 @@ static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, tcgv_i64_arg(a3), a4); } -static void DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGArg a3, TCGArg a4) +static TCGOp * DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGArg a3, TCGArg a4) { - tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); + return tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); } -static void DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGArg a3, TCGArg a4) +static TCGOp * DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGArg a3, TCGArg a4) { - tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); + return tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); } static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, @@ -263,12 +270,12 @@ static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6); } -static void DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, - TCGArg a5, TCGArg a6) +static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, + TCGArg a5, TCGArg a6) { - tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6); + return tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6); } /* Generic ops. */ @@ -279,18 +286,17 @@ void gen_set_label(TCGLabel *l) tcg_gen_op1(INDEX_op_set_label, label_arg(l)); } -static void add_last_as_label_use(TCGLabel *l) +static void add_as_label_use(TCGLabel *l, TCGOp *op) { TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse)); - u->op = tcg_last_op(); + u->op = op; QSIMPLEQ_INSERT_TAIL(&l->branches, u, next); } void tcg_gen_br(TCGLabel *l) { - tcg_gen_op1(INDEX_op_br, label_arg(l)); - add_last_as_label_use(l); + add_as_label_use(l, tcg_gen_op1(INDEX_op_br, label_arg(l))); } void tcg_gen_mb(TCGBar mb_type) @@ -507,8 +513,9 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { - tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); - add_last_as_label_use(l); + TCGOp *op = tcg_gen_op4ii_i32(INDEX_op_brcond_i32, + arg1, arg2, cond, label_arg(l)); + add_as_label_use(l, op); } } @@ -1927,15 +1934,16 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { + TCGOp *op; if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), - TCGV_HIGH(arg1), TCGV_LOW(arg2), - TCGV_HIGH(arg2), cond, label_arg(l)); + op = tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), + TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2), cond, label_arg(l)); } else { - tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, - label_arg(l)); + op = tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, + label_arg(l)); } - add_last_as_label_use(l); + add_as_label_use(l, op); } } @@ -1946,12 +1954,12 @@ void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l) } else if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { - tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, - TCGV_LOW(arg1), TCGV_HIGH(arg1), - tcg_constant_i32(arg2), - tcg_constant_i32(arg2 >> 32), - cond, label_arg(l)); - add_last_as_label_use(l); + TCGOp *op = tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, + TCGV_LOW(arg1), TCGV_HIGH(arg1), + tcg_constant_i32(arg2), + tcg_constant_i32(arg2 >> 32), + cond, label_arg(l)); + add_as_label_use(l, op); } } |