aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-05-07 07:34:58 -0700
committerRichard Henderson <richard.henderson@linaro.org>2024-05-07 07:34:58 -0700
commit571882c668a0861bf4fcc0411347eab2379200e5 (patch)
tree79822490d6b2b7df559746853edfb719bf690245
parente116b92d01c2cd75957a9f8ad1d4932292867b81 (diff)
parentf578b66e8c70ddea71d44db6e2c7abbcd757d684 (diff)
Merge tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu into staging
tcg: Add write_aofs to GVecGen3i tcg/i386: Simplify immediate 8-bit logical vector shifts tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff tcg/optimize: Optimize setcond with zmask accel/tcg: Introduce CF_BP_PAGE target/sh4: Update DisasContextBase.insn_start gitlab: Drop --static from s390x linux-user build gitlab: Streamline ubuntu-22.04-s390x # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmY6OoAdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8FEwf7Bhs9bV2Kp4LxUzGq # +dSHHc/WuCyIILLDQ4kZyXvILuI59wYhrWBUUTzBnAZ/tEf0oMG2y57F/lIcxz9w # VvsFicMOhtjQ8iBEfl/rkkaYs9BLcxqMTAA3PxNBE6l3bzjcHSTkhey4MoPGRibn # CkwaLzb2ebNjfgzC1IsNf/tyiMXl0tBQM7JVV4EztaOGEmqw8X0/PyVZDiC3WUNC # tf9yqiNIlgGkn7rj3sT/rNdi4xlzQybgrb1MCFT6z5cqsW2bwqivRpxHi4yulHKI # VhYA3kud+TX2ASukpibsSkA+9SbcH/qwOugPhPIu+KANsFUcVKL6Anzv6Ysl9kZ0 # +Wnbow== # =FJCW # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 May 2024 07:28:16 AM PDT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu: gitlab: Streamline ubuntu-22.04-s390x gitlab: Drop --static from s390x linux-user build gitlab: Drop --disable-libssh from ubuntu-22.04-s390x.yml target/sh4: Update DisasContextBase.insn_start accel/tcg: Introduce CF_BP_PAGE tcg/optimize: Optimize setcond with zmask tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff tcg/i386: Simplify immediate 8-bit logical vector shifts tcg: Add write_aofs to GVecGen3i Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml16
-rw-r--r--accel/tcg/cpu-exec.c2
-rw-r--r--include/exec/translation-block.h1
-rw-r--r--include/tcg/tcg-op-gvec-common.h2
-rw-r--r--target/sh4/translate.c1
-rw-r--r--tcg/i386/tcg-target.c.inc76
-rw-r--r--tcg/optimize.c110
-rw-r--r--tcg/tcg-op-gvec.c30
8 files changed, 173 insertions, 65 deletions
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
index 105981879f..25935048e2 100644
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
@@ -2,7 +2,7 @@
# setup by the scripts/ci/setup/build-environment.yml task
# "Install basic packages to build QEMU on Ubuntu 22.04"
-ubuntu-22.04-s390x-all-linux-static:
+ubuntu-22.04-s390x-all-linux:
extends: .custom_runner_template
needs: []
stage: build
@@ -15,13 +15,13 @@ ubuntu-22.04-s390x-all-linux-static:
script:
- mkdir build
- cd build
- - ../configure --enable-debug --static --disable-system
+ - ../configure --enable-debug --disable-system --disable-tools --disable-docs
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync check-tcg
- make --output-sync -j`nproc` check
-ubuntu-22.04-s390x-all:
+ubuntu-22.04-s390x-all-system:
extends: .custom_runner_template
needs: []
stage: build
@@ -35,7 +35,7 @@ ubuntu-22.04-s390x-all:
script:
- mkdir build
- cd build
- - ../configure --disable-libssh
+ - ../configure --disable-user
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync -j`nproc` check
@@ -57,7 +57,7 @@ ubuntu-22.04-s390x-alldbg:
script:
- mkdir build
- cd build
- - ../configure --enable-debug --disable-libssh
+ - ../configure --enable-debug
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make clean
- make --output-sync -j`nproc`
@@ -80,7 +80,7 @@ ubuntu-22.04-s390x-clang:
script:
- mkdir build
- cd build
- - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
+ - ../configure --cc=clang --cxx=clang++ --enable-sanitizers
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync -j`nproc` check
@@ -101,7 +101,7 @@ ubuntu-22.04-s390x-tci:
script:
- mkdir build
- cd build
- - ../configure --disable-libssh --enable-tcg-interpreter
+ - ../configure --enable-tcg-interpreter
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
@@ -122,7 +122,7 @@ ubuntu-22.04-s390x-notcg:
script:
- mkdir build
- cd build
- - ../configure --disable-libssh --disable-tcg
+ - ../configure --disable-tcg
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync -j`nproc` check
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 9af66bc191..2972f75b96 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -381,7 +381,7 @@ static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
* breakpoints are removed.
*/
if (match_page) {
- *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
+ *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | CF_BP_PAGE | 1;
}
return false;
}
diff --git a/include/exec/translation-block.h b/include/exec/translation-block.h
index 48211c890a..a6d1af6e9b 100644
--- a/include/exec/translation-block.h
+++ b/include/exec/translation-block.h
@@ -77,6 +77,7 @@ struct TranslationBlock {
#define CF_PARALLEL 0x00008000 /* Generate code for a parallel context */
#define CF_NOIRQ 0x00010000 /* Generate an uninterruptible TB */
#define CF_PCREL 0x00020000 /* Opcodes in TB are PC-relative */
+#define CF_BP_PAGE 0x00040000 /* Breakpoint present in code page */
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24
diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h
index 4db8a58c14..65553f5f97 100644
--- a/include/tcg/tcg-op-gvec-common.h
+++ b/include/tcg/tcg-op-gvec-common.h
@@ -183,6 +183,8 @@ typedef struct {
bool prefer_i64;
/* Load dest as a 3rd source operand. */
bool load_dest;
+ /* Write aofs as a 2nd dest operand. */
+ bool write_aofs;
} GVecGen3i;
typedef struct {
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index e599ab9d1a..b3282f3ac7 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -2189,6 +2189,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
*/
for (i = 1; i < max_insns; ++i) {
tcg_gen_insn_start(pc + i * 2, ctx->envflags);
+ ctx->base.insn_start = tcg_last_op();
}
}
#endif
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index c6ba498623..59235b4f38 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1658,6 +1658,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
TCGArg dest, TCGArg arg1, TCGArg arg2,
int const_arg2, bool neg)
{
+ int cmp_rexw = rexw;
bool inv = false;
bool cleared;
int jcc;
@@ -1674,6 +1675,18 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
}
break;
+ case TCG_COND_TSTNE:
+ inv = true;
+ /* fall through */
+ case TCG_COND_TSTEQ:
+ /* If arg2 is -1, convert to LTU/GEU vs 1. */
+ if (const_arg2 && arg2 == 0xffffffffu) {
+ arg2 = 1;
+ cmp_rexw = 0;
+ goto do_ltu;
+ }
+ break;
+
case TCG_COND_LEU:
inv = true;
/* fall through */
@@ -1697,7 +1710,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
* We can then use NEG or INC to produce the desired result.
* This is always smaller than the SETCC expansion.
*/
- tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, rexw);
+ tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw);
/* X - X - C = -C = (C ? -1 : 0) */
tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest);
@@ -1744,7 +1757,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
cleared = true;
}
- jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw);
+ jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw);
tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest);
if (!cleared) {
@@ -3769,49 +3782,20 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
}
-static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
+static void expand_vec_shi(TCGType type, unsigned vece, bool right,
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
{
- TCGv_vec t1, t2;
+ uint8_t mask;
tcg_debug_assert(vece == MO_8);
-
- t1 = tcg_temp_new_vec(type);
- t2 = tcg_temp_new_vec(type);
-
- /*
- * Unpack to W, shift, and repack. Tricky bits:
- * (1) Use punpck*bw x,x to produce DDCCBBAA,
- * i.e. duplicate in other half of the 16-bit lane.
- * (2) For right-shift, add 8 so that the high half of the lane
- * becomes zero. For left-shift, and left-rotate, we must
- * shift up and down again.
- * (3) Step 2 leaves high half zero such that PACKUSWB
- * (pack with unsigned saturation) does not modify
- * the quantity.
- */
- vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
- tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
- vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
- tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
-
- if (opc != INDEX_op_rotli_vec) {
- imm += 8;
- }
- if (opc == INDEX_op_shri_vec) {
- tcg_gen_shri_vec(MO_16, t1, t1, imm);
- tcg_gen_shri_vec(MO_16, t2, t2, imm);
+ if (right) {
+ mask = 0xff >> imm;
+ tcg_gen_shri_vec(MO_16, v0, v1, imm);
} else {
- tcg_gen_shli_vec(MO_16, t1, t1, imm);
- tcg_gen_shli_vec(MO_16, t2, t2, imm);
- tcg_gen_shri_vec(MO_16, t1, t1, 8);
- tcg_gen_shri_vec(MO_16, t2, t2, 8);
+ mask = 0xff << imm;
+ tcg_gen_shli_vec(MO_16, v0, v1, imm);
}
-
- vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
- tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
- tcg_temp_free_vec(t1);
- tcg_temp_free_vec(t2);
+ tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask));
}
static void expand_vec_sari(TCGType type, unsigned vece,
@@ -3821,7 +3805,7 @@ static void expand_vec_sari(TCGType type, unsigned vece,
switch (vece) {
case MO_8:
- /* Unpack to W, shift, and repack, as in expand_vec_shi. */
+ /* Unpack to 16-bit, shift, and repack. */
t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
@@ -3874,12 +3858,7 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
{
TCGv_vec t;
- if (vece == MO_8) {
- expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
- return;
- }
-
- if (have_avx512vbmi2) {
+ if (vece != MO_8 && have_avx512vbmi2) {
vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
return;
@@ -4155,10 +4134,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
switch (opc) {
case INDEX_op_shli_vec:
+ expand_vec_shi(type, vece, false, v0, v1, a2);
+ break;
case INDEX_op_shri_vec:
- expand_vec_shi(type, vece, opc, v0, v1, a2);
+ expand_vec_shi(type, vece, true, v0, v1, a2);
break;
-
case INDEX_op_sari_vec:
expand_vec_sari(type, vece, v0, v1, a2);
break;
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2e9e5725a9..8886f7037a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2099,6 +2099,108 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
return false;
}
+static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
+{
+ uint64_t a_zmask, b_val;
+ TCGCond cond;
+
+ if (!arg_is_const(op->args[2])) {
+ return false;
+ }
+
+ a_zmask = arg_info(op->args[1])->z_mask;
+ b_val = arg_info(op->args[2])->val;
+ cond = op->args[3];
+
+ if (ctx->type == TCG_TYPE_I32) {
+ a_zmask = (uint32_t)a_zmask;
+ b_val = (uint32_t)b_val;
+ }
+
+ /*
+ * A with only low bits set vs B with high bits set means that A < B.
+ */
+ if (a_zmask < b_val) {
+ bool inv = false;
+
+ switch (cond) {
+ case TCG_COND_NE:
+ case TCG_COND_LEU:
+ case TCG_COND_LTU:
+ inv = true;
+ /* fall through */
+ case TCG_COND_GTU:
+ case TCG_COND_GEU:
+ case TCG_COND_EQ:
+ return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
+ default:
+ break;
+ }
+ }
+
+ /*
+ * A with only lsb set is already boolean.
+ */
+ if (a_zmask <= 1) {
+ bool convert = false;
+ bool inv = false;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ inv = true;
+ /* fall through */
+ case TCG_COND_NE:
+ convert = (b_val == 0);
+ break;
+ case TCG_COND_LTU:
+ case TCG_COND_TSTEQ:
+ inv = true;
+ /* fall through */
+ case TCG_COND_GEU:
+ case TCG_COND_TSTNE:
+ convert = (b_val == 1);
+ break;
+ default:
+ break;
+ }
+ if (convert) {
+ TCGOpcode add_opc, xor_opc, neg_opc;
+
+ if (!inv && !neg) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+ }
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ add_opc = INDEX_op_add_i32;
+ neg_opc = INDEX_op_neg_i32;
+ xor_opc = INDEX_op_xor_i32;
+ break;
+ case TCG_TYPE_I64:
+ add_opc = INDEX_op_add_i64;
+ neg_opc = INDEX_op_neg_i64;
+ xor_opc = INDEX_op_xor_i64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (!inv) {
+ op->opc = neg_opc;
+ } else if (neg) {
+ op->opc = add_opc;
+ op->args[2] = arg_new_constant(ctx, -1);
+ } else {
+ op->opc = xor_opc;
+ op->args[2] = arg_new_constant(ctx, 1);
+ }
+ return false;
+ }
+ }
+
+ return false;
+}
+
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
{
TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
@@ -2200,6 +2302,10 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
+
+ if (fold_setcond_zmask(ctx, op, false)) {
+ return true;
+ }
fold_setcond_tst_pow2(ctx, op, false);
ctx->z_mask = 1;
@@ -2214,6 +2320,10 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
}
+
+ if (fold_setcond_zmask(ctx, op, true)) {
+ return true;
+ }
fold_setcond_tst_pow2(ctx, op, true);
/* Value is {0,-1} so all bits are repetitions of the sign. */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index bb88943f79..0308732d9b 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -785,7 +785,8 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs,
}
static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
- uint32_t oprsz, int32_t c, bool load_dest,
+ uint32_t oprsz, int32_t c,
+ bool load_dest, bool write_aofs,
void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t))
{
TCGv_i32 t0 = tcg_temp_new_i32();
@@ -801,6 +802,9 @@ static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
}
fni(t2, t0, t1, c);
tcg_gen_st_i32(t2, tcg_env, dofs + i);
+ if (write_aofs) {
+ tcg_gen_st_i32(t0, tcg_env, aofs + i);
+ }
}
tcg_temp_free_i32(t0);
tcg_temp_free_i32(t1);
@@ -944,7 +948,8 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs,
}
static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
- uint32_t oprsz, int64_t c, bool load_dest,
+ uint32_t oprsz, int64_t c,
+ bool load_dest, bool write_aofs,
void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t))
{
TCGv_i64 t0 = tcg_temp_new_i64();
@@ -960,6 +965,9 @@ static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
}
fni(t2, t0, t1, c);
tcg_gen_st_i64(t2, tcg_env, dofs + i);
+ if (write_aofs) {
+ tcg_gen_st_i64(t0, tcg_env, aofs + i);
+ }
}
tcg_temp_free_i64(t0);
tcg_temp_free_i64(t1);
@@ -1102,7 +1110,8 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
*/
static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t tysz,
- TCGType type, int64_t c, bool load_dest,
+ TCGType type, int64_t c,
+ bool load_dest, bool write_aofs,
void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec,
int64_t))
{
@@ -1118,6 +1127,9 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
}
fni(vece, t2, t0, t1, c);
tcg_gen_st_vec(t2, tcg_env, dofs + i);
+ if (write_aofs) {
+ tcg_gen_st_vec(t0, tcg_env, aofs + i);
+ }
}
}
@@ -1471,7 +1483,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
*/
some = QEMU_ALIGN_DOWN(oprsz, 32);
expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
- c, g->load_dest, g->fniv);
+ c, g->load_dest, g->write_aofs, g->fniv);
if (some == oprsz) {
break;
}
@@ -1483,18 +1495,20 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
/* fallthru */
case TCG_TYPE_V128:
expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
- c, g->load_dest, g->fniv);
+ c, g->load_dest, g->write_aofs, g->fniv);
break;
case TCG_TYPE_V64:
expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
- c, g->load_dest, g->fniv);
+ c, g->load_dest, g->write_aofs, g->fniv);
break;
case 0:
if (g->fni8 && check_size_impl(oprsz, 8)) {
- expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8);
+ expand_3i_i64(dofs, aofs, bofs, oprsz, c,
+ g->load_dest, g->write_aofs, g->fni8);
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
- expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4);
+ expand_3i_i32(dofs, aofs, bofs, oprsz, c,
+ g->load_dest, g->write_aofs, g->fni4);
} else {
assert(g->fno != NULL);
tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno);