aboutsummaryrefslogtreecommitdiff
path: root/tcg
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2024-09-27 10:32:29 +0100
committerPeter Maydell <peter.maydell@linaro.org>2024-09-27 10:32:29 +0100
commit4ae7d11b70a840eec7aa27269093b15d04ebc84e (patch)
treec3b2612319a7b3e392d38fd0f97ed40cb1f0747d /tcg
parente10cd93872c31332b002c933a798ab0bc51705a4 (diff)
parentc4d80fa63e823dc8dbf094b29e39b6978a3073b6 (diff)
Merge tag 'pull-tcg-20240922' of https://gitlab.com/rth7680/qemu into staging
target/ppc: Fix lxvx/stxvx facility check linux-user: update syscall_nr.h to Linux v6.10 linux-user: update syscall.tbl to Linux v6.11 tcg: Fix iteration step in 32-bit gvec operation tcg: Propagate new TCGOp to add_as_label_use tcg/*: Do not expand cmp_vec, cmpsel_vec early tcg/optimize: Fold movcond with true and false values identical tcg/optimize: Optimize cmp_vec and cmpsel_vec tcg/optimize: Optimize bitsel_vec tcg/i386: Optimize cmpsel with constant 0 operand 3. tcg/i386: Implement cmp_vec with avx512 insns tcg/i386: Implement cmpsel_vec with avx512 insns tcg/i386: Implement vector TST{EQ,NE} for avx512 tcg/ppc: Implement cmpsel_vec and optimize with constant 0/-1 arguments tcg/s390x: Implement cmpsel_vec and optimize with constant 0/-1 arguments # -----BEGIN PGP SIGNATURE----- # # iQFQBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmbwBsIdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/bzwf40V8fsRhfc8u/k2Xw # +bXyfyX7ydgB+82YoO71+Wy15ntmYmbL/6O9kGuJXWX8HRDrAR8Js9zDgveysw5m # m/EG+XAbVjYhjaoaaz2nfr+5auQoos9NoCji3s3UJln6sCuZ2Enl0DTFluoHZfgr # /YVAghJ4dwH0hfzO9kmsKmJ9I19HugMMN4dNvEcjQyDDUhgGGjkUaRtGNxwuiwuf # ArL2XhFauXgloryDmpUKQ0BJJ0t8rdPlaRJ8dHqJUnJwav0TW3QVXbrs8gOkmmbg # 9plYTC9DlTguQobvapCDVjiGy/6yuSF/AUpcdQbTRtTi3BrzhTydfLlOtI64GtaQ # OBKd # =ID/g # -----END PGP SIGNATURE----- # gpg: Signature made Sun 22 Sep 2024 13:00:02 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * tag 'pull-tcg-20240922' of https://gitlab.com/rth7680/qemu: (31 commits) linux-user: update syscall.tbl to Linux v6.11 linux-user,loongarch: move to syscalltbl file linux-user,hexagon: move to syscalltbl file linux-user,riscv: move to syscalltbl file linux-user,openrisc: move to syscalltbl file linux-user,aarch64: move to syscalltbl file linux-user: update syscall.tbl to Linux v6.10 linux-user, mips: update syscall-args-o32.c.inc to Linux v6.10 linux-user: update syscall_nr.h to Linux v6.10 target/ppc: Fix lxvx/stxvx facility check tcg/s390x: Optimize cmpsel with constant 0/-1 arguments tcg/s390x: Implement cmpsel_vec tcg/ppc: Optimize cmpsel with constant 0/-1 arguments tcg/ppc: Implement cmpsel_vec tcg/i386: Implement vector TST{EQ,NE} for avx512 tcg/i386: Implement cmpsel_vec with avx512 insns tcg/i386: Add predicate parameters to tcg_out_evex_opc tcg/i386: Implement cmp_vec with avx512 insns tcg/i386: Optimize cmpsel with constant 0 operand 3. tcg/optimize: Optimize bitsel_vec ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'tcg')
-rw-r--r--tcg/i386/tcg-target-con-set.h1
-rw-r--r--tcg/i386/tcg-target-con-str.h1
-rw-r--r--tcg/i386/tcg-target.c.inc486
-rw-r--r--tcg/i386/tcg-target.h4
-rw-r--r--tcg/i386/tcg-target.opc.h1
-rw-r--r--tcg/optimize.c99
-rw-r--r--tcg/ppc/tcg-target-con-set.h1
-rw-r--r--tcg/ppc/tcg-target.c.inc254
-rw-r--r--tcg/ppc/tcg-target.h2
-rw-r--r--tcg/s390x/tcg-target-con-set.h2
-rw-r--r--tcg/s390x/tcg-target-con-str.h1
-rw-r--r--tcg/s390x/tcg-target.c.inc195
-rw-r--r--tcg/s390x/tcg-target.h2
-rw-r--r--tcg/tcg-internal.h14
-rw-r--r--tcg/tcg-op-gvec.c2
-rw-r--r--tcg/tcg-op-vec.c4
-rw-r--r--tcg/tcg-op.c86
17 files changed, 748 insertions, 407 deletions
diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index e24241cfa2..06e6521001 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -50,6 +50,7 @@ C_N1_I2(r, r, r)
C_N1_I2(r, r, rW)
C_O1_I3(x, 0, x, x)
C_O1_I3(x, x, x, x)
+C_O1_I4(x, x, x, xO, x)
C_O1_I4(r, r, reT, r, 0)
C_O1_I4(r, r, r, ri, ri)
C_O2_I1(r, r, L)
diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h
index cc22db227b..52142ab121 100644
--- a/tcg/i386/tcg-target-con-str.h
+++ b/tcg/i386/tcg-target-con-str.h
@@ -28,6 +28,7 @@ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */
*/
CONST('e', TCG_CT_CONST_S32)
CONST('I', TCG_CT_CONST_I32)
+CONST('O', TCG_CT_CONST_ZERO)
CONST('T', TCG_CT_CONST_TST)
CONST('W', TCG_CT_CONST_WSZ)
CONST('Z', TCG_CT_CONST_U32)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 9a54ef7f8d..1bf50f1f62 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -133,6 +133,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define TCG_CT_CONST_I32 0x400
#define TCG_CT_CONST_WSZ 0x800
#define TCG_CT_CONST_TST 0x1000
+#define TCG_CT_CONST_ZERO 0x2000
/* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
@@ -226,6 +227,9 @@ static bool tcg_target_const_match(int64_t val, int ct,
if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
return 1;
}
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }
return 0;
}
@@ -409,6 +413,18 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define OPC_UD2 (0x0b | P_EXT)
#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
+#define OPC_VPBLENDMB (0x66 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPBLENDMW (0x66 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPBLENDMD (0x64 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPBLENDMQ (0x64 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPCMPB (0x3f | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPUB (0x3e | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPW (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPCMPUW (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPCMPD (0x1f | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPUD (0x1e | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPQ (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPCMPUQ (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16)
#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16)
#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
@@ -417,6 +433,10 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
+#define OPC_VPMOVM2B (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX)
+#define OPC_VPMOVM2W (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
+#define OPC_VPMOVM2D (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX)
+#define OPC_VPMOVM2Q (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX)
@@ -442,6 +462,14 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPTESTMB (0x26 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPTESTMW (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPTESTMD (0x27 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPTESTMQ (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPTESTNMB (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX)
+#define OPC_VPTESTNMW (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
+#define OPC_VPTESTNMD (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX)
+#define OPC_VPTESTNMQ (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
#define OPC_VZEROUPPER (0x77 | P_EXT)
#define OPC_XCHG_ax_r32 (0x90)
#define OPC_XCHG_EvGv (0x87)
@@ -658,7 +686,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
}
static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
- int rm, int index)
+ int rm, int index, int aaa, bool z)
{
/* The entire 4-byte evex prefix; with R' and V' set. */
uint32_t p = 0x08041062;
@@ -695,7 +723,9 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
p = deposit32(p, 16, 2, pp);
p = deposit32(p, 19, 4, ~v);
p = deposit32(p, 23, 1, (opc & P_VEXW) != 0);
+ p = deposit32(p, 24, 3, aaa);
p = deposit32(p, 29, 2, (opc & P_VEXL) != 0);
+ p = deposit32(p, 31, 1, z);
tcg_out32(s, p);
tcg_out8(s, opc);
@@ -704,13 +734,32 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
{
if (opc & P_EVEX) {
- tcg_out_evex_opc(s, opc, r, v, rm, 0);
+ tcg_out_evex_opc(s, opc, r, v, rm, 0, 0, false);
} else {
tcg_out_vex_opc(s, opc, r, v, rm, 0);
}
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}
+static void tcg_out_vex_modrm_type(TCGContext *s, int opc,
+ int r, int v, int rm, TCGType type)
+{
+ if (type == TCG_TYPE_V256) {
+ opc |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, opc, r, v, rm);
+}
+
+static void tcg_out_evex_modrm_type(TCGContext *s, int opc, int r, int v,
+ int rm, int aaa, bool z, TCGType type)
+{
+ if (type == TCG_TYPE_V256) {
+ opc |= P_VEXL;
+ }
+ tcg_out_evex_opc(s, opc, r, v, rm, 0, aaa, z);
+ tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
We handle either RM and INDEX missing with a negative value. In 64-bit
mode for absolute addresses, ~RM is the size of the immediate operand
@@ -904,8 +953,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg a)
{
if (have_avx2) {
- int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
- tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a);
+ tcg_out_vex_modrm_type(s, avx2_dup_insn[vece], r, 0, a, type);
} else {
switch (vece) {
case MO_8:
@@ -3021,6 +3069,214 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
#undef OP_32_64
}
+static int const umin_insn[4] = {
+ OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
+};
+
+static int const umax_insn[4] = {
+ OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
+};
+
+static bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond)
+{
+ static int const cmpeq_insn[4] = {
+ OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
+ };
+ static int const cmpgt_insn[4] = {
+ OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
+ };
+
+ enum {
+ NEED_INV = 1,
+ NEED_SWAP = 2,
+ NEED_UMIN = 4,
+ NEED_UMAX = 8,
+ INVALID = 16,
+ };
+ static const uint8_t cond_fixup[16] = {
+ [0 ... 15] = INVALID,
+ [TCG_COND_EQ] = 0,
+ [TCG_COND_GT] = 0,
+ [TCG_COND_NE] = NEED_INV,
+ [TCG_COND_LE] = NEED_INV,
+ [TCG_COND_LT] = NEED_SWAP,
+ [TCG_COND_GE] = NEED_SWAP | NEED_INV,
+ [TCG_COND_LEU] = NEED_UMIN,
+ [TCG_COND_GTU] = NEED_UMIN | NEED_INV,
+ [TCG_COND_GEU] = NEED_UMAX,
+ [TCG_COND_LTU] = NEED_UMAX | NEED_INV,
+ };
+ int fixup = cond_fixup[cond];
+
+ assert(!(fixup & INVALID));
+
+ if (fixup & NEED_INV) {
+ cond = tcg_invert_cond(cond);
+ }
+
+ if (fixup & NEED_SWAP) {
+ TCGReg swap = v1;
+ v1 = v2;
+ v2 = swap;
+ cond = tcg_swap_cond(cond);
+ }
+
+ if (fixup & (NEED_UMIN | NEED_UMAX)) {
+ int op = (fixup & NEED_UMIN ? umin_insn[vece] : umax_insn[vece]);
+
+ /* avx2 does not have 64-bit min/max; adjusted during expand. */
+ assert(vece <= MO_32);
+
+ tcg_out_vex_modrm_type(s, op, TCG_TMP_VEC, v1, v2, type);
+ v2 = TCG_TMP_VEC;
+ cond = TCG_COND_EQ;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ tcg_out_vex_modrm_type(s, cmpeq_insn[vece], v0, v1, v2, type);
+ break;
+ case TCG_COND_GT:
+ tcg_out_vex_modrm_type(s, cmpgt_insn[vece], v0, v1, v2, type);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return fixup & NEED_INV;
+}
+
+static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v1, TCGReg v2, TCGCond cond)
+{
+ static const int cmpm_insn[2][4] = {
+ { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ },
+ { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ }
+ };
+ static const int testm_insn[4] = {
+ OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ
+ };
+ static const int testnm_insn[4] = {
+ OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ
+ };
+
+ static const int cond_ext[16] = {
+ [TCG_COND_EQ] = 0,
+ [TCG_COND_NE] = 4,
+ [TCG_COND_LT] = 1,
+ [TCG_COND_LTU] = 1,
+ [TCG_COND_LE] = 2,
+ [TCG_COND_LEU] = 2,
+ [TCG_COND_NEVER] = 3,
+ [TCG_COND_GE] = 5,
+ [TCG_COND_GEU] = 5,
+ [TCG_COND_GT] = 6,
+ [TCG_COND_GTU] = 6,
+ [TCG_COND_ALWAYS] = 7,
+ };
+
+ switch (cond) {
+ case TCG_COND_TSTNE:
+ tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type);
+ break;
+ case TCG_COND_TSTEQ:
+ tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type);
+ break;
+ default:
+ tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece],
+ /* k1 */ 1, v1, v2, type);
+ tcg_out8(s, cond_ext[cond]);
+ break;
+ }
+}
+
+static void tcg_out_k1_to_vec(TCGContext *s, TCGType type,
+ unsigned vece, TCGReg dest)
+{
+ static const int movm_insn[] = {
+ OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q
+ };
+ tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type);
+}
+
+static void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond)
+{
+ /*
+ * With avx512, we have a complete set of comparisons into mask.
+ * Unless there's a single insn expansion for the comparision,
+ * expand via a mask in k1.
+ */
+ if ((vece <= MO_16 ? have_avx512bw : have_avx512dq)
+ && cond != TCG_COND_EQ
+ && cond != TCG_COND_LT
+ && cond != TCG_COND_GT) {
+ tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond);
+ tcg_out_k1_to_vec(s, type, vece, v0);
+ return;
+ }
+
+ if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) {
+ tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1);
+ tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type);
+ }
+}
+
+static void tcg_out_cmpsel_vec_k1(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v0, TCGReg c1, TCGReg c2,
+ TCGReg v3, TCGReg v4, TCGCond cond)
+{
+ static const int vpblendm_insn[] = {
+ OPC_VPBLENDMB, OPC_VPBLENDMW, OPC_VPBLENDMD, OPC_VPBLENDMQ
+ };
+ bool z = false;
+
+ /* Swap to place constant in V4 to take advantage of zero-masking. */
+ if (!v3) {
+ z = true;
+ v3 = v4;
+ cond = tcg_invert_cond(cond);
+ }
+
+ tcg_out_cmp_vec_k1(s, type, vece, c1, c2, cond);
+ tcg_out_evex_modrm_type(s, vpblendm_insn[vece], v0, v4, v3,
+ /* k1 */1, z, type);
+}
+
+static void tcg_out_cmpsel_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v0, TCGReg c1, TCGReg c2,
+ TCGReg v3, TCGReg v4, TCGCond cond)
+{
+ bool inv;
+
+ if (vece <= MO_16 ? have_avx512bw : have_avx512vl) {
+ tcg_out_cmpsel_vec_k1(s, type, vece, v0, c1, c2, v3, v4, cond);
+ return;
+ }
+
+ inv = tcg_out_cmp_vec_noinv(s, type, vece, TCG_TMP_VEC, c1, c2, cond);
+
+ /*
+ * Since XMM0 is 16, the only way we get 0 into V3
+ * is via the constant zero constraint.
+ */
+ if (!v3) {
+ if (inv) {
+ tcg_out_vex_modrm_type(s, OPC_PAND, v0, TCG_TMP_VEC, v4, type);
+ } else {
+ tcg_out_vex_modrm_type(s, OPC_PANDN, v0, TCG_TMP_VEC, v4, type);
+ }
+ } else {
+ if (inv) {
+ TCGReg swap = v3;
+ v3 = v4;
+ v4 = swap;
+ }
+ tcg_out_vex_modrm_type(s, OPC_VPBLENDVB, v0, v4, v3, type);
+ tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4);
+ }
+}
+
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
@@ -3050,12 +3306,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
static int const shift_imm_insn[4] = {
OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
};
- static int const cmpeq_insn[4] = {
- OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
- };
- static int const cmpgt_insn[4] = {
- OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
- };
static int const punpckl_insn[4] = {
OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
};
@@ -3074,12 +3324,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
static int const smax_insn[4] = {
OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ
};
- static int const umin_insn[4] = {
- OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
- };
- static int const umax_insn[4] = {
- OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
- };
static int const rotlv_insn[4] = {
OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
};
@@ -3231,29 +3475,21 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
goto gen_simd;
gen_simd:
tcg_debug_assert(insn != OPC_UD2);
- if (type == TCG_TYPE_V256) {
- insn |= P_VEXL;
- }
- tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type);
break;
case INDEX_op_cmp_vec:
- sub = args[3];
- if (sub == TCG_COND_EQ) {
- insn = cmpeq_insn[vece];
- } else if (sub == TCG_COND_GT) {
- insn = cmpgt_insn[vece];
- } else {
- g_assert_not_reached();
- }
- goto gen_simd;
+ tcg_out_cmp_vec(s, type, vece, a0, a1, a2, args[3]);
+ break;
+
+ case INDEX_op_cmpsel_vec:
+ tcg_out_cmpsel_vec(s, type, vece, a0, a1, a2,
+ args[3], args[4], args[5]);
+ break;
case INDEX_op_andc_vec:
insn = OPC_PANDN;
- if (type == TCG_TYPE_V256) {
- insn |= P_VEXL;
- }
- tcg_out_vex_modrm(s, insn, a0, a2, a1);
+ tcg_out_vex_modrm_type(s, insn, a0, a2, a1, type);
break;
case INDEX_op_shli_vec:
@@ -3281,10 +3517,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
goto gen_shift;
gen_shift:
tcg_debug_assert(vece != MO_8);
- if (type == TCG_TYPE_V256) {
- insn |= P_VEXL;
- }
- tcg_out_vex_modrm(s, insn, sub, a0, a1);
+ tcg_out_vex_modrm_type(s, insn, sub, a0, a1, type);
tcg_out8(s, a2);
break;
@@ -3361,22 +3594,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
gen_simd_imm8:
tcg_debug_assert(insn != OPC_UD2);
- if (type == TCG_TYPE_V256) {
- insn |= P_VEXL;
- }
- tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type);
tcg_out8(s, sub);
break;
- case INDEX_op_x86_vpblendvb_vec:
- insn = OPC_VPBLENDVB;
- if (type == TCG_TYPE_V256) {
- insn |= P_VEXL;
- }
- tcg_out_vex_modrm(s, insn, a0, a1, a2);
- tcg_out8(s, args[3] << 4);
- break;
-
case INDEX_op_x86_psrldq_vec:
tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
tcg_out8(s, a2);
@@ -3642,8 +3863,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
return C_O1_I3(x, 0, x, x);
case INDEX_op_bitsel_vec:
- case INDEX_op_x86_vpblendvb_vec:
return C_O1_I3(x, x, x, x);
+ case INDEX_op_cmpsel_vec:
+ return C_O1_I4(x, x, x, xO, x);
default:
g_assert_not_reached();
@@ -3979,145 +4201,59 @@ static void expand_vec_mul(TCGType type, unsigned vece,
}
}
-static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+static TCGCond expand_vec_cond(TCGType type, unsigned vece,
+ TCGArg *a1, TCGArg *a2, TCGCond cond)
{
- enum {
- NEED_INV = 1,
- NEED_SWAP = 2,
- NEED_BIAS = 4,
- NEED_UMIN = 8,
- NEED_UMAX = 16,
- };
- TCGv_vec t1, t2, t3;
- uint8_t fixup;
-
- switch (cond) {
- case TCG_COND_EQ:
- case TCG_COND_GT:
- fixup = 0;
- break;
- case TCG_COND_NE:
- case TCG_COND_LE:
- fixup = NEED_INV;
- break;
- case TCG_COND_LT:
- fixup = NEED_SWAP;
- break;
- case TCG_COND_GE:
- fixup = NEED_SWAP | NEED_INV;
- break;
- case TCG_COND_LEU:
- if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
- fixup = NEED_UMIN;
- } else {
- fixup = NEED_BIAS | NEED_INV;
- }
- break;
- case TCG_COND_GTU:
- if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
- fixup = NEED_UMIN | NEED_INV;
- } else {
- fixup = NEED_BIAS;
- }
- break;
- case TCG_COND_GEU:
- if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
- fixup = NEED_UMAX;
- } else {
- fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
- }
- break;
- case TCG_COND_LTU:
- if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
- fixup = NEED_UMAX | NEED_INV;
- } else {
- fixup = NEED_BIAS | NEED_SWAP;
- }
- break;
- default:
- g_assert_not_reached();
- }
-
- if (fixup & NEED_INV) {
- cond = tcg_invert_cond(cond);
- }
- if (fixup & NEED_SWAP) {
- t1 = v1, v1 = v2, v2 = t1;
- cond = tcg_swap_cond(cond);
- }
+ /*
+ * Without AVX512, there are no 64-bit unsigned comparisons.
+ * We must bias the inputs so that they become signed.
+ * All other swapping and inversion are handled during code generation.
+ */
+ if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) {
+ TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1));
+ TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2));
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
- t1 = t2 = NULL;
- if (fixup & (NEED_UMIN | NEED_UMAX)) {
- t1 = tcg_temp_new_vec(type);
- if (fixup & NEED_UMIN) {
- tcg_gen_umin_vec(vece, t1, v1, v2);
- } else {
- tcg_gen_umax_vec(vece, t1, v1, v2);
- }
- v2 = t1;
- cond = TCG_COND_EQ;
- } else if (fixup & NEED_BIAS) {
- t1 = tcg_temp_new_vec(type);
- t2 = tcg_temp_new_vec(type);
- t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
tcg_gen_sub_vec(vece, t1, v1, t3);
tcg_gen_sub_vec(vece, t2, v2, t3);
- v1 = t1;
- v2 = t2;
+ *a1 = tcgv_vec_arg(t1);
+ *a2 = tcgv_vec_arg(t2);
cond = tcg_signed_cond(cond);
}
-
- tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
- /* Expand directly; do not recurse. */
- vec_gen_4(INDEX_op_cmp_vec, type, vece,
- tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
-
- if (t1) {
- tcg_temp_free_vec(t1);
- if (t2) {
- tcg_temp_free_vec(t2);
- }
- }
- return fixup & NEED_INV;
+ return cond;
}
-static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGArg a0,
+ TCGArg a1, TCGArg a2, TCGCond cond)
{
- if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
- tcg_gen_not_vec(vece, v0, v0);
- }
+ cond = expand_vec_cond(type, vece, &a1, &a2, cond);
+ /* Expand directly; do not recurse. */
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond);
}
-static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec c1, TCGv_vec c2,
- TCGv_vec v3, TCGv_vec v4, TCGCond cond)
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGArg a0,
+ TCGArg a1, TCGArg a2,
+ TCGArg a3, TCGArg a4, TCGCond cond)
{
- TCGv_vec t = tcg_temp_new_vec(type);
-
- if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
- /* Invert the sense of the compare by swapping arguments. */
- TCGv_vec x;
- x = v3, v3 = v4, v4 = x;
- }
- vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece,
- tcgv_vec_arg(v0), tcgv_vec_arg(v4),
- tcgv_vec_arg(v3), tcgv_vec_arg(t));
- tcg_temp_free_vec(t);
+ cond = expand_vec_cond(type, vece, &a1, &a2, cond);
+ /* Expand directly; do not recurse. */
+ vec_gen_6(INDEX_op_cmpsel_vec, type, vece, a0, a1, a2, a3, a4, cond);
}
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
- TCGArg a2;
- TCGv_vec v0, v1, v2, v3, v4;
+ TCGArg a1, a2, a3, a4, a5;
+ TCGv_vec v0, v1, v2;
va_start(va, a0);
- v0 = temp_tcgv_vec(arg_temp(a0));
- v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ a1 = va_arg(va, TCGArg);
a2 = va_arg(va, TCGArg);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(a1));
switch (opc) {
case INDEX_op_shli_vec:
@@ -4153,15 +4289,15 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
break;
case INDEX_op_cmp_vec:
- v2 = temp_tcgv_vec(arg_temp(a2));
- expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
+ a3 = va_arg(va, TCGArg);
+ expand_vec_cmp(type, vece, a0, a1, a2, a3);
break;
case INDEX_op_cmpsel_vec:
- v2 = temp_tcgv_vec(arg_temp(a2));
- v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
- v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
- expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
+ a3 = va_arg(va, TCGArg);
+ a4 = va_arg(va, TCGArg);
+ a5 = va_arg(va, TCGArg);
+ expand_vec_cmpsel(type, vece, a0, a1, a2, a3, a4, a5);
break;
default:
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 2f67a97e05..c68ac023d8 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -223,8 +223,8 @@ typedef enum {
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
#define TCG_TARGET_HAS_bitsel_vec have_avx512vl
-#define TCG_TARGET_HAS_cmpsel_vec -1
-#define TCG_TARGET_HAS_tst_vec 0
+#define TCG_TARGET_HAS_cmpsel_vec 1
+#define TCG_TARGET_HAS_tst_vec have_avx512bw
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
(((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
index b5f403e35e..4ffc084bda 100644
--- a/tcg/i386/tcg-target.opc.h
+++ b/tcg/i386/tcg-target.opc.h
@@ -25,7 +25,6 @@
*/
DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
-DEF(x86_vpblendvb_vec, 1, 3, 0, IMPLVEC)
DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index ba16ec27e2..e9ef16b3c6 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1851,6 +1851,11 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
{
int i;
+ /* If true and false values are the same, eliminate the cmp. */
+ if (args_are_copies(op->args[3], op->args[4])) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
+ }
+
/*
* Canonicalize the "false" input reg to match the destination reg so
* that the tcg backend can implement a "move if true" operation.
@@ -2417,6 +2422,36 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
+{
+ /* Canonicalize the comparison to put immediate second. */
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+ op->args[3] = tcg_swap_cond(op->args[3]);
+ }
+ return false;
+}
+
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
+{
+ /* If true and false values are the same, eliminate the cmp. */
+ if (args_are_copies(op->args[3], op->args[4])) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
+ }
+
+ /* Canonicalize the comparison to put immediate second. */
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+ op->args[5] = tcg_swap_cond(op->args[5]);
+ }
+ /*
+ * Canonicalize the "false" input reg to match the destination,
+ * so that the tcg backend can implement "move if true".
+ */
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
+ op->args[5] = tcg_invert_cond(op->args[5]);
+ }
+ return false;
+}
+
static bool fold_sextract(OptContext *ctx, TCGOp *op)
{
uint64_t z_mask, s_mask, s_mask_old;
@@ -2702,6 +2737,61 @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
return fold_masks(ctx, op);
}
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
+{
+ /* If true and false values are the same, eliminate the cmp. */
+ if (args_are_copies(op->args[2], op->args[3])) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
+ }
+
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
+ uint64_t tv = arg_info(op->args[2])->val;
+ uint64_t fv = arg_info(op->args[3])->val;
+
+ if (tv == -1 && fv == 0) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+ }
+ if (tv == 0 && fv == -1) {
+ if (TCG_TARGET_HAS_not_vec) {
+ op->opc = INDEX_op_not_vec;
+ return fold_not(ctx, op);
+ } else {
+ op->opc = INDEX_op_xor_vec;
+ op->args[2] = arg_new_constant(ctx, -1);
+ return fold_xor(ctx, op);
+ }
+ }
+ }
+ if (arg_is_const(op->args[2])) {
+ uint64_t tv = arg_info(op->args[2])->val;
+ if (tv == -1) {
+ op->opc = INDEX_op_or_vec;
+ op->args[2] = op->args[3];
+ return fold_or(ctx, op);
+ }
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
+ op->opc = INDEX_op_andc_vec;
+ op->args[2] = op->args[1];
+ op->args[1] = op->args[3];
+ return fold_andc(ctx, op);
+ }
+ }
+ if (arg_is_const(op->args[3])) {
+ uint64_t fv = arg_info(op->args[3])->val;
+ if (fv == 0) {
+ op->opc = INDEX_op_and_vec;
+ return fold_and(ctx, op);
+ }
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
+ op->opc = INDEX_op_orc_vec;
+ op->args[2] = op->args[1];
+ op->args[1] = op->args[3];
+ return fold_orc(ctx, op);
+ }
+ }
+ return false;
+}
+
/* Propagate constants and copies, fold constant expressions. */
void tcg_optimize(TCGContext *s)
{
@@ -2923,6 +3013,15 @@ void tcg_optimize(TCGContext *s)
case INDEX_op_setcond2_i32:
done = fold_setcond2(&ctx, op);
break;
+ case INDEX_op_cmp_vec:
+ done = fold_cmp_vec(&ctx, op);
+ break;
+ case INDEX_op_cmpsel_vec:
+ done = fold_cmpsel_vec(&ctx, op);
+ break;
+ case INDEX_op_bitsel_vec:
+ done = fold_bitsel_vec(&ctx, op);
+ break;
CASE_OP_32_64(sextract):
done = fold_sextract(&ctx, op);
break;
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
index 9f99bde505..453abde6c1 100644
--- a/tcg/ppc/tcg-target-con-set.h
+++ b/tcg/ppc/tcg-target-con-set.h
@@ -33,6 +33,7 @@ C_O1_I2(r, r, rU)
C_O1_I2(r, r, rZW)
C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
+C_O1_I4(v, v, v, vZM, v)
C_O1_I4(r, r, rC, rZ, rZ)
C_O1_I4(r, r, r, ri, ri)
C_O2_I1(r, r, r)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 3553a47ba9..3f413ce3c1 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -3567,12 +3567,14 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_usadd_vec:
case INDEX_op_ussub_vec:
return vece <= MO_32;
- case INDEX_op_cmp_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
case INDEX_op_rotli_vec:
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
+ case INDEX_op_cmp_vec:
+ case INDEX_op_cmpsel_vec:
+ return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
case INDEX_op_neg_vec:
return vece >= MO_32 && have_isa_3_00;
case INDEX_op_mul_vec:
@@ -3713,6 +3715,149 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
return true;
}
+static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
+{
+ tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
+}
+
+static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
+{
+ tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
+}
+
+static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
+{
+ tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
+}
+
+static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
+{
+ tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
+}
+
+static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
+{
+ tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
+}
+
+static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
+ TCGReg c, TCGReg t, TCGReg f)
+{
+ if (TCG_TARGET_HAS_bitsel_vec) {
+ tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
+ } else {
+ tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
+ tcg_out_andc_vec(s, d, f, c);
+ tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
+ }
+}
+
+static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
+ TCGReg a1, TCGReg a2, TCGCond cond)
+{
+ static const uint32_t
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
+ ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
+ uint32_t insn;
+
+ bool need_swap = false, need_inv = false;
+
+ tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ break;
+ case TCG_COND_NE:
+ if (have_isa_3_00 && vece <= MO_32) {
+ break;
+ }
+ /* fall through */
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ need_inv = true;
+ break;
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ need_swap = true;
+ break;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ need_swap = need_inv = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (need_inv) {
+ cond = tcg_invert_cond(cond);
+ }
+ if (need_swap) {
+ TCGReg swap = a1;
+ a1 = a2;
+ a2 = swap;
+ cond = tcg_swap_cond(cond);
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ insn = eq_op[vece];
+ break;
+ case TCG_COND_NE:
+ insn = ne_op[vece];
+ break;
+ case TCG_COND_GT:
+ insn = gts_op[vece];
+ break;
+ case TCG_COND_GTU:
+ insn = gtu_op[vece];
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
+
+ return need_inv;
+}
+
+static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
+ TCGReg a1, TCGReg a2, TCGCond cond)
+{
+ if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
+ tcg_out_not_vec(s, a0, a0);
+ }
+}
+
+static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
+ TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
+ TCGReg v4, TCGCond cond)
+{
+ bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
+
+ if (!const_v3) {
+ if (inv) {
+ tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
+ } else {
+ tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
+ }
+ } else if (v3) {
+ if (inv) {
+ tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
+ } else {
+ tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
+ }
+ } else {
+ if (inv) {
+ tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
+ } else {
+ tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
+ }
+ }
+}
+
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
@@ -3723,10 +3868,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
mul_op[4] = { 0, 0, VMULUWM, VMULLD },
neg_op[4] = { 0, 0, VNEGW, VNEGD },
- eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
- ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
- gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
@@ -3808,24 +3949,23 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
insn = sarv_op[vece];
break;
case INDEX_op_and_vec:
- insn = VAND;
- break;
+ tcg_out_and_vec(s, a0, a1, a2);
+ return;
case INDEX_op_or_vec:
- insn = VOR;
- break;
+ tcg_out_or_vec(s, a0, a1, a2);
+ return;
case INDEX_op_xor_vec:
insn = VXOR;
break;
case INDEX_op_andc_vec:
- insn = VANDC;
- break;
+ tcg_out_andc_vec(s, a0, a1, a2);
+ return;
case INDEX_op_not_vec:
- insn = VNOR;
- a2 = a1;
- break;
+ tcg_out_not_vec(s, a0, a1);
+ return;
case INDEX_op_orc_vec:
- insn = VORC;
- break;
+ tcg_out_orc_vec(s, a0, a1, a2);
+ return;
case INDEX_op_nand_vec:
insn = VNAND;
break;
@@ -3837,26 +3977,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_cmp_vec:
- switch (args[3]) {
- case TCG_COND_EQ:
- insn = eq_op[vece];
- break;
- case TCG_COND_NE:
- insn = ne_op[vece];
- break;
- case TCG_COND_GT:
- insn = gts_op[vece];
- break;
- case TCG_COND_GTU:
- insn = gtu_op[vece];
- break;
- default:
- g_assert_not_reached();
- }
- break;
-
+ tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
+ return;
+ case INDEX_op_cmpsel_vec:
+ tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
+ args[3], const_args[3], args[4], args[5]);
+ return;
case INDEX_op_bitsel_vec:
- tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
+ tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
return;
case INDEX_op_dup2_vec:
@@ -3921,56 +4049,6 @@ static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
}
-static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec v2, TCGCond cond)
-{
- bool need_swap = false, need_inv = false;
-
- tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
-
- switch (cond) {
- case TCG_COND_EQ:
- case TCG_COND_GT:
- case TCG_COND_GTU:
- break;
- case TCG_COND_NE:
- if (have_isa_3_00 && vece <= MO_32) {
- break;
- }
- /* fall through */
- case TCG_COND_LE:
- case TCG_COND_LEU:
- need_inv = true;
- break;
- case TCG_COND_LT:
- case TCG_COND_LTU:
- need_swap = true;
- break;
- case TCG_COND_GE:
- case TCG_COND_GEU:
- need_swap = need_inv = true;
- break;
- default:
- g_assert_not_reached();
- }
-
- if (need_inv) {
- cond = tcg_invert_cond(cond);
- }
- if (need_swap) {
- TCGv_vec t1;
- t1 = v1, v1 = v2, v2 = t1;
- cond = tcg_swap_cond(cond);
- }
-
- vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
- tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
-
- if (need_inv) {
- tcg_gen_not_vec(vece, v0, v0);
- }
-}
-
static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec v2)
{
@@ -4045,10 +4123,6 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
case INDEX_op_rotli_vec:
expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
break;
- case INDEX_op_cmp_vec:
- v2 = temp_tcgv_vec(arg_temp(a2));
- expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
- break;
case INDEX_op_mul_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_mul(type, vece, v0, v1, v2);
@@ -4276,6 +4350,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_bitsel_vec:
case INDEX_op_ppc_msum_vec:
return C_O1_I3(v, v, v, v);
+ case INDEX_op_cmpsel_vec:
+ return C_O1_I4(v, v, v, vZM, v);
default:
g_assert_not_reached();
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index e154fb14df..0b2171d38c 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -172,7 +172,7 @@ typedef enum {
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
#define TCG_TARGET_HAS_bitsel_vec have_vsx
-#define TCG_TARGET_HAS_cmpsel_vec 0
+#define TCG_TARGET_HAS_cmpsel_vec 1
#define TCG_TARGET_HAS_tst_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index f75955eaa8..370e4b1295 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -38,6 +38,8 @@ C_O1_I2(r, rZ, r)
C_O1_I2(v, v, r)
C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
+C_O1_I4(v, v, v, vZ, v)
+C_O1_I4(v, v, v, vZM, v)
C_O1_I4(r, r, ri, rI, r)
C_O1_I4(r, r, rC, rI, r)
C_O2_I1(o, m, r)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
index 745f6c0df5..3e574e0662 100644
--- a/tcg/s390x/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -20,6 +20,7 @@ CONST('C', TCG_CT_CONST_CMP)
CONST('I', TCG_CT_CONST_S16)
CONST('J', TCG_CT_CONST_S32)
CONST('K', TCG_CT_CONST_P32)
+CONST('M', TCG_CT_CONST_M1)
CONST('N', TCG_CT_CONST_INV)
CONST('R', TCG_CT_CONST_INVRISBG)
CONST('U', TCG_CT_CONST_U32)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index ad587325fc..a5d57197a4 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -36,6 +36,7 @@
#define TCG_CT_CONST_INV (1 << 13)
#define TCG_CT_CONST_INVRISBG (1 << 14)
#define TCG_CT_CONST_CMP (1 << 15)
+#define TCG_CT_CONST_M1 (1 << 16)
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
@@ -46,6 +47,7 @@
/* A scratch register that may be be used throughout the backend. */
#define TCG_TMP0 TCG_REG_R1
+#define TCG_VEC_TMP0 TCG_REG_V31
#define TCG_GUEST_BASE_REG TCG_REG_R13
@@ -606,6 +608,9 @@ static bool tcg_target_const_match(int64_t val, int ct,
if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
return true;
}
+ if ((ct & TCG_CT_CONST_M1) && val == -1) {
+ return true;
+ }
if (ct & TCG_CT_CONST_INV) {
val = ~val;
@@ -2841,6 +2846,94 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
}
+static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
+ TCGReg a1, TCGReg a2, TCGCond cond)
+{
+ bool need_swap = false, need_inv = false;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ break;
+ case TCG_COND_NE:
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ need_inv = true;
+ break;
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ need_swap = true;
+ break;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ need_swap = need_inv = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (need_inv) {
+ cond = tcg_invert_cond(cond);
+ }
+ if (need_swap) {
+ TCGReg swap = a1;
+ a1 = a2;
+ a2 = swap;
+ cond = tcg_swap_cond(cond);
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
+ break;
+ case TCG_COND_GT:
+ tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return need_inv;
+}
+
+static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
+ TCGReg a1, TCGReg a2, TCGCond cond)
+{
+ if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
+ tcg_out_insn(s, VRRc, VNO, a0, a0, a0, 0);
+ }
+}
+
+static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
+ TCGReg c1, TCGReg c2, TCGArg v3,
+ int const_v3, TCGReg v4, TCGCond cond)
+{
+ bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP0, c1, c2, cond);
+
+ if (!const_v3) {
+ if (inv) {
+ tcg_out_insn(s, VRRe, VSEL, a0, v4, v3, TCG_VEC_TMP0);
+ } else {
+ tcg_out_insn(s, VRRe, VSEL, a0, v3, v4, TCG_VEC_TMP0);
+ }
+ } else if (v3) {
+ if (inv) {
+ tcg_out_insn(s, VRRc, VOC, a0, v4, TCG_VEC_TMP0, 0);
+ } else {
+ tcg_out_insn(s, VRRc, VO, a0, v4, TCG_VEC_TMP0, 0);
+ }
+ } else {
+ if (inv) {
+ tcg_out_insn(s, VRRc, VN, a0, v4, TCG_VEC_TMP0, 0);
+ } else {
+ tcg_out_insn(s, VRRc, VNC, a0, v4, TCG_VEC_TMP0, 0);
+ }
+ }
+}
+
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
@@ -2959,19 +3052,11 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_cmp_vec:
- switch ((TCGCond)args[3]) {
- case TCG_COND_EQ:
- tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
- break;
- case TCG_COND_GT:
- tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
- break;
- case TCG_COND_GTU:
- tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
- break;
- default:
- g_assert_not_reached();
- }
+ tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
+ break;
+ case INDEX_op_cmpsel_vec:
+ tcg_out_cmpsel_vec(s, vece, a0, a1, a2, args[3], const_args[3],
+ args[4], args[5]);
break;
case INDEX_op_s390_vuph_vec:
@@ -3024,9 +3109,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_umax_vec:
case INDEX_op_umin_vec:
case INDEX_op_xor_vec:
- return 1;
case INDEX_op_cmp_vec:
case INDEX_op_cmpsel_vec:
+ return 1;
case INDEX_op_rotrv_vec:
return -1;
case INDEX_op_mul_vec:
@@ -3039,71 +3124,6 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
}
-static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec v2, TCGCond cond)
-{
- bool need_swap = false, need_inv = false;
-
- switch (cond) {
- case TCG_COND_EQ:
- case TCG_COND_GT:
- case TCG_COND_GTU:
- break;
- case TCG_COND_NE:
- case TCG_COND_LE:
- case TCG_COND_LEU:
- need_inv = true;
- break;
- case TCG_COND_LT:
- case TCG_COND_LTU:
- need_swap = true;
- break;
- case TCG_COND_GE:
- case TCG_COND_GEU:
- need_swap = need_inv = true;
- break;
- default:
- g_assert_not_reached();
- }
-
- if (need_inv) {
- cond = tcg_invert_cond(cond);
- }
- if (need_swap) {
- TCGv_vec t1;
- t1 = v1, v1 = v2, v2 = t1;
- cond = tcg_swap_cond(cond);
- }
-
- vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
- tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
-
- return need_inv;
-}
-
-static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec v2, TCGCond cond)
-{
- if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
- tcg_gen_not_vec(vece, v0, v0);
- }
-}
-
-static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec c1, TCGv_vec c2,
- TCGv_vec v3, TCGv_vec v4, TCGCond cond)
-{
- TCGv_vec t = tcg_temp_new_vec(type);
-
- if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
- /* Invert the sense of the compare by swapping arguments. */
- tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
- } else {
- tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
- }
- tcg_temp_free_vec(t);
-}
-
static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
{
@@ -3145,7 +3165,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
- TCGv_vec v0, v1, v2, v3, v4, t0;
+ TCGv_vec v0, v1, v2, t0;
va_start(va, a0);
v0 = temp_tcgv_vec(arg_temp(a0));
@@ -3153,16 +3173,6 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
switch (opc) {
- case INDEX_op_cmp_vec:
- expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
- break;
-
- case INDEX_op_cmpsel_vec:
- v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
- v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
- expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
- break;
-
case INDEX_op_rotrv_vec:
t0 = tcg_temp_new_vec(type);
tcg_gen_neg_vec(vece, t0, v2);
@@ -3397,6 +3407,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
return C_O1_I2(v, v, r);
case INDEX_op_bitsel_vec:
return C_O1_I3(v, v, v, v);
+ case INDEX_op_cmpsel_vec:
+ return (TCG_TARGET_HAS_orc_vec
+ ? C_O1_I4(v, v, v, vZM, v)
+ : C_O1_I4(v, v, v, vZ, v));
default:
g_assert_not_reached();
@@ -3521,6 +3535,7 @@ static void tcg_target_init(TCGContext *s)
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
/* XXX many insns can't be used with R0, so we better avoid it for now */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 62ce9d792a..86aeca166f 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -162,7 +162,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_sat_vec 0
#define TCG_TARGET_HAS_minmax_vec 1
#define TCG_TARGET_HAS_bitsel_vec 1
-#define TCG_TARGET_HAS_cmpsel_vec 0
+#define TCG_TARGET_HAS_cmpsel_vec 1
#define TCG_TARGET_HAS_tst_vec 0
/* used for function call generation */
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
index 9b0d982f65..8099248076 100644
--- a/tcg/tcg-internal.h
+++ b/tcg/tcg-internal.h
@@ -92,15 +92,17 @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind);
*/
TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
-void tcg_gen_op1(TCGOpcode, TCGArg);
-void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg);
-void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg);
-void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
-void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
-void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op1(TCGOpcode, TCGArg);
+TCGOp *tcg_gen_op2(TCGOpcode, TCGArg, TCGArg);
+TCGOp *tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+TCGOp *tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
+void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
+ TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e);
#endif /* TCG_INTERNAL_H */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 0308732d9b..78ee1ced80 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -3939,7 +3939,7 @@ void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t i;
tcg_gen_extrl_i64_i32(t1, c);
- for (i = 0; i < oprsz; i += 8) {
+ for (i = 0; i < oprsz; i += 4) {
tcg_gen_ld_i32(t0, tcg_env, aofs + i);
tcg_gen_negsetcond_i32(cond, t0, t0, t1);
tcg_gen_st_i32(t0, tcg_env, dofs + i);
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index 84af210bc0..d4bb4aee74 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -172,8 +172,8 @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
op->args[3] = c;
}
-static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
- TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
+void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
+ TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
{
TCGOp *op = tcg_emit_op(opc, 6);
TCGOP_VECL(op) = type - TCG_TYPE_V64;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index eff3728622..4a7e705367 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -37,38 +37,43 @@
*/
#define NI __attribute__((noinline))
-void NI tcg_gen_op1(TCGOpcode opc, TCGArg a1)
+TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGArg a1)
{
TCGOp *op = tcg_emit_op(opc, 1);
op->args[0] = a1;
+ return op;
}
-void NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
+TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
{
TCGOp *op = tcg_emit_op(opc, 2);
op->args[0] = a1;
op->args[1] = a2;
+ return op;
}
-void NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
+TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
{
TCGOp *op = tcg_emit_op(opc, 3);
op->args[0] = a1;
op->args[1] = a2;
op->args[2] = a3;
+ return op;
}
-void NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
+TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2,
+ TCGArg a3, TCGArg a4)
{
TCGOp *op = tcg_emit_op(opc, 4);
op->args[0] = a1;
op->args[1] = a2;
op->args[2] = a3;
op->args[3] = a4;
+ return op;
}
-void NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
- TCGArg a4, TCGArg a5)
+TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2,
+ TCGArg a3, TCGArg a4, TCGArg a5)
{
TCGOp *op = tcg_emit_op(opc, 5);
op->args[0] = a1;
@@ -76,10 +81,11 @@ void NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
op->args[2] = a3;
op->args[3] = a4;
op->args[4] = a5;
+ return op;
}
-void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
- TCGArg a4, TCGArg a5, TCGArg a6)
+TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
+ TCGArg a4, TCGArg a5, TCGArg a6)
{
TCGOp *op = tcg_emit_op(opc, 6);
op->args[0] = a1;
@@ -88,6 +94,7 @@ void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
op->args[3] = a4;
op->args[4] = a5;
op->args[5] = a6;
+ return op;
}
/*
@@ -110,9 +117,9 @@ static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1)
tcg_gen_op1(opc, tcgv_i64_arg(a1));
}
-static void DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
+static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
{
- tcg_gen_op1(opc, a1);
+ return tcg_gen_op1(opc, a1);
}
static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2)
@@ -189,16 +196,16 @@ static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
tcgv_i64_arg(a3), a4);
}
-static void DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
- TCGArg a3, TCGArg a4)
+static TCGOp * DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGArg a3, TCGArg a4)
{
- tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
+ return tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
}
-static void DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
- TCGArg a3, TCGArg a4)
+static TCGOp * DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGArg a3, TCGArg a4)
{
- tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
+ return tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
}
static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
@@ -263,12 +270,12 @@ static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6);
}
-static void DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
- TCGv_i32 a3, TCGv_i32 a4,
- TCGArg a5, TCGArg a6)
+static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGv_i32 a4,
+ TCGArg a5, TCGArg a6)
{
- tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
- tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6);
+ return tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+ tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6);
}
/* Generic ops. */
@@ -279,18 +286,17 @@ void gen_set_label(TCGLabel *l)
tcg_gen_op1(INDEX_op_set_label, label_arg(l));
}
-static void add_last_as_label_use(TCGLabel *l)
+static void add_as_label_use(TCGLabel *l, TCGOp *op)
{
TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse));
- u->op = tcg_last_op();
+ u->op = op;
QSIMPLEQ_INSERT_TAIL(&l->branches, u, next);
}
void tcg_gen_br(TCGLabel *l)
{
- tcg_gen_op1(INDEX_op_br, label_arg(l));
- add_last_as_label_use(l);
+ add_as_label_use(l, tcg_gen_op1(INDEX_op_br, label_arg(l)));
}
void tcg_gen_mb(TCGBar mb_type)
@@ -507,8 +513,9 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
if (cond == TCG_COND_ALWAYS) {
tcg_gen_br(l);
} else if (cond != TCG_COND_NEVER) {
- tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
- add_last_as_label_use(l);
+ TCGOp *op = tcg_gen_op4ii_i32(INDEX_op_brcond_i32,
+ arg1, arg2, cond, label_arg(l));
+ add_as_label_use(l, op);
}
}
@@ -1927,15 +1934,16 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
if (cond == TCG_COND_ALWAYS) {
tcg_gen_br(l);
} else if (cond != TCG_COND_NEVER) {
+ TCGOp *op;
if (TCG_TARGET_REG_BITS == 32) {
- tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
- TCGV_HIGH(arg1), TCGV_LOW(arg2),
- TCGV_HIGH(arg2), cond, label_arg(l));
+ op = tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
+ TCGV_HIGH(arg1), TCGV_LOW(arg2),
+ TCGV_HIGH(arg2), cond, label_arg(l));
} else {
- tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
- label_arg(l));
+ op = tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
+ label_arg(l));
}
- add_last_as_label_use(l);
+ add_as_label_use(l, op);
}
}
@@ -1946,12 +1954,12 @@ void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
} else if (cond == TCG_COND_ALWAYS) {
tcg_gen_br(l);
} else if (cond != TCG_COND_NEVER) {
- tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
- TCGV_LOW(arg1), TCGV_HIGH(arg1),
- tcg_constant_i32(arg2),
- tcg_constant_i32(arg2 >> 32),
- cond, label_arg(l));
- add_last_as_label_use(l);
+ TCGOp *op = tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ tcg_constant_i32(arg2),
+ tcg_constant_i32(arg2 >> 32),
+ cond, label_arg(l));
+ add_as_label_use(l, op);
}
}