diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2024-05-07 07:34:58 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2024-05-07 07:34:58 -0700 |
commit | 571882c668a0861bf4fcc0411347eab2379200e5 (patch) | |
tree | 79822490d6b2b7df559746853edfb719bf690245 | |
parent | e116b92d01c2cd75957a9f8ad1d4932292867b81 (diff) | |
parent | f578b66e8c70ddea71d44db6e2c7abbcd757d684 (diff) |
Merge tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu into staging
tcg: Add write_aofs to GVecGen3i
tcg/i386: Simplify immediate 8-bit logical vector shifts
tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff
tcg/optimize: Optimize setcond with zmask
accel/tcg: Introduce CF_BP_PAGE
target/sh4: Update DisasContextBase.insn_start
gitlab: Drop --static from s390x linux-user build
gitlab: Streamline ubuntu-22.04-s390x
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmY6OoAdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8FEwf7Bhs9bV2Kp4LxUzGq
# +dSHHc/WuCyIILLDQ4kZyXvILuI59wYhrWBUUTzBnAZ/tEf0oMG2y57F/lIcxz9w
# VvsFicMOhtjQ8iBEfl/rkkaYs9BLcxqMTAA3PxNBE6l3bzjcHSTkhey4MoPGRibn
# CkwaLzb2ebNjfgzC1IsNf/tyiMXl0tBQM7JVV4EztaOGEmqw8X0/PyVZDiC3WUNC
# tf9yqiNIlgGkn7rj3sT/rNdi4xlzQybgrb1MCFT6z5cqsW2bwqivRpxHi4yulHKI
# VhYA3kud+TX2ASukpibsSkA+9SbcH/qwOugPhPIu+KANsFUcVKL6Anzv6Ysl9kZ0
# +Wnbow==
# =FJCW
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 07 May 2024 07:28:16 AM PDT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]
* tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu:
gitlab: Streamline ubuntu-22.04-s390x
gitlab: Drop --static from s390x linux-user build
gitlab: Drop --disable-libssh from ubuntu-22.04-s390x.yml
target/sh4: Update DisasContextBase.insn_start
accel/tcg: Introduce CF_BP_PAGE
tcg/optimize: Optimize setcond with zmask
tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff
tcg/i386: Simplify immediate 8-bit logical vector shifts
tcg: Add write_aofs to GVecGen3i
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | .gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml | 16 | ||||
-rw-r--r-- | accel/tcg/cpu-exec.c | 2 | ||||
-rw-r--r-- | include/exec/translation-block.h | 1 | ||||
-rw-r--r-- | include/tcg/tcg-op-gvec-common.h | 2 | ||||
-rw-r--r-- | target/sh4/translate.c | 1 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 76 | ||||
-rw-r--r-- | tcg/optimize.c | 110 | ||||
-rw-r--r-- | tcg/tcg-op-gvec.c | 30 |
8 files changed, 173 insertions, 65 deletions
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml index 105981879f..25935048e2 100644 --- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml +++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml @@ -2,7 +2,7 @@ # setup by the scripts/ci/setup/build-environment.yml task # "Install basic packages to build QEMU on Ubuntu 22.04" -ubuntu-22.04-s390x-all-linux-static: +ubuntu-22.04-s390x-all-linux: extends: .custom_runner_template needs: [] stage: build @@ -15,13 +15,13 @@ ubuntu-22.04-s390x-all-linux-static: script: - mkdir build - cd build - - ../configure --enable-debug --static --disable-system + - ../configure --enable-debug --disable-system --disable-tools --disable-docs || { cat config.log meson-logs/meson-log.txt; exit 1; } - make --output-sync -j`nproc` - make --output-sync check-tcg - make --output-sync -j`nproc` check -ubuntu-22.04-s390x-all: +ubuntu-22.04-s390x-all-system: extends: .custom_runner_template needs: [] stage: build @@ -35,7 +35,7 @@ ubuntu-22.04-s390x-all: script: - mkdir build - cd build - - ../configure --disable-libssh + - ../configure --disable-user || { cat config.log meson-logs/meson-log.txt; exit 1; } - make --output-sync -j`nproc` - make --output-sync -j`nproc` check @@ -57,7 +57,7 @@ ubuntu-22.04-s390x-alldbg: script: - mkdir build - cd build - - ../configure --enable-debug --disable-libssh + - ../configure --enable-debug || { cat config.log meson-logs/meson-log.txt; exit 1; } - make clean - make --output-sync -j`nproc` @@ -80,7 +80,7 @@ ubuntu-22.04-s390x-clang: script: - mkdir build - cd build - - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers + - ../configure --cc=clang --cxx=clang++ --enable-sanitizers || { cat config.log meson-logs/meson-log.txt; exit 1; } - make --output-sync -j`nproc` - make --output-sync -j`nproc` check @@ -101,7 +101,7 @@ ubuntu-22.04-s390x-tci: script: - mkdir build - cd build - - ../configure --disable-libssh --enable-tcg-interpreter + - ../configure --enable-tcg-interpreter || { cat config.log meson-logs/meson-log.txt; exit 1; } - make --output-sync -j`nproc` @@ -122,7 +122,7 @@ ubuntu-22.04-s390x-notcg: script: - mkdir build - cd build - - ../configure --disable-libssh --disable-tcg + - ../configure --disable-tcg || { cat config.log meson-logs/meson-log.txt; exit 1; } - make --output-sync -j`nproc` - make --output-sync -j`nproc` check diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 9af66bc191..2972f75b96 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -381,7 +381,7 @@ static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc, * breakpoints are removed. */ if (match_page) { - *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1; + *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | CF_BP_PAGE | 1; } return false; } diff --git a/include/exec/translation-block.h b/include/exec/translation-block.h index 48211c890a..a6d1af6e9b 100644 --- a/include/exec/translation-block.h +++ b/include/exec/translation-block.h @@ -77,6 +77,7 @@ struct TranslationBlock { #define CF_PARALLEL 0x00008000 /* Generate code for a parallel context */ #define CF_NOIRQ 0x00010000 /* Generate an uninterruptible TB */ #define CF_PCREL 0x00020000 /* Opcodes in TB are PC-relative */ +#define CF_BP_PAGE 0x00040000 /* Breakpoint present in code page */ #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h index 4db8a58c14..65553f5f97 100644 --- a/include/tcg/tcg-op-gvec-common.h +++ b/include/tcg/tcg-op-gvec-common.h @@ -183,6 +183,8 @@ typedef struct { bool prefer_i64; /* Load dest as a 3rd source operand. */ bool load_dest; + /* Write aofs as a 2nd dest operand. */ + bool write_aofs; } GVecGen3i; typedef struct { diff --git a/target/sh4/translate.c b/target/sh4/translate.c index e599ab9d1a..b3282f3ac7 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -2189,6 +2189,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) */ for (i = 1; i < max_insns; ++i) { tcg_gen_insn_start(pc + i * 2, ctx->envflags); + ctx->base.insn_start = tcg_last_op(); } } #endif diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index c6ba498623..59235b4f38 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1658,6 +1658,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, TCGArg dest, TCGArg arg1, TCGArg arg2, int const_arg2, bool neg) { + int cmp_rexw = rexw; bool inv = false; bool cleared; int jcc; @@ -1674,6 +1675,18 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, } break; + case TCG_COND_TSTNE: + inv = true; + /* fall through */ + case TCG_COND_TSTEQ: + /* If arg2 is -1, convert to LTU/GEU vs 1. */ + if (const_arg2 && arg2 == 0xffffffffu) { + arg2 = 1; + cmp_rexw = 0; + goto do_ltu; + } + break; + case TCG_COND_LEU: inv = true; /* fall through */ @@ -1697,7 +1710,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, * We can then use NEG or INC to produce the desired result. * This is always smaller than the SETCC expansion. */ - tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, rexw); + tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw); /* X - X - C = -C = (C ? -1 : 0) */ tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest); @@ -1744,7 +1757,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, cleared = true; } - jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw); + jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw); tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest); if (!cleared) { @@ -3769,49 +3782,20 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) } } -static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc, +static void expand_vec_shi(TCGType type, unsigned vece, bool right, TCGv_vec v0, TCGv_vec v1, TCGArg imm) { - TCGv_vec t1, t2; + uint8_t mask; tcg_debug_assert(vece == MO_8); - - t1 = tcg_temp_new_vec(type); - t2 = tcg_temp_new_vec(type); - - /* - * Unpack to W, shift, and repack. Tricky bits: - * (1) Use punpck*bw x,x to produce DDCCBBAA, - * i.e. duplicate in other half of the 16-bit lane. - * (2) For right-shift, add 8 so that the high half of the lane - * becomes zero. For left-shift, and left-rotate, we must - * shift up and down again. - * (3) Step 2 leaves high half zero such that PACKUSWB - * (pack with unsigned saturation) does not modify - * the quantity. - */ - vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, - tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); - vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, - tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); - - if (opc != INDEX_op_rotli_vec) { - imm += 8; - } - if (opc == INDEX_op_shri_vec) { - tcg_gen_shri_vec(MO_16, t1, t1, imm); - tcg_gen_shri_vec(MO_16, t2, t2, imm); + if (right) { + mask = 0xff >> imm; + tcg_gen_shri_vec(MO_16, v0, v1, imm); } else { - tcg_gen_shli_vec(MO_16, t1, t1, imm); - tcg_gen_shli_vec(MO_16, t2, t2, imm); - tcg_gen_shri_vec(MO_16, t1, t1, 8); - tcg_gen_shri_vec(MO_16, t2, t2, 8); + mask = 0xff << imm; + tcg_gen_shli_vec(MO_16, v0, v1, imm); } - - vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8, - tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2)); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t2); + tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask)); } static void expand_vec_sari(TCGType type, unsigned vece, @@ -3821,7 +3805,7 @@ static void expand_vec_sari(TCGType type, unsigned vece, switch (vece) { case MO_8: - /* Unpack to W, shift, and repack, as in expand_vec_shi. */ + /* Unpack to 16-bit, shift, and repack. */ t1 = tcg_temp_new_vec(type); t2 = tcg_temp_new_vec(type); vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, @@ -3874,12 +3858,7 @@ static void expand_vec_rotli(TCGType type, unsigned vece, { TCGv_vec t; - if (vece == MO_8) { - expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm); - return; - } - - if (have_avx512vbmi2) { + if (vece != MO_8 && have_avx512vbmi2) { vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm); return; @@ -4155,10 +4134,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, switch (opc) { case INDEX_op_shli_vec: + expand_vec_shi(type, vece, false, v0, v1, a2); + break; case INDEX_op_shri_vec: - expand_vec_shi(type, vece, opc, v0, v1, a2); + expand_vec_shi(type, vece, true, v0, v1, a2); break; - case INDEX_op_sari_vec: expand_vec_sari(type, vece, v0, v1, a2); break; diff --git a/tcg/optimize.c b/tcg/optimize.c index 2e9e5725a9..8886f7037a 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2099,6 +2099,108 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op) return false; } +static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) +{ + uint64_t a_zmask, b_val; + TCGCond cond; + + if (!arg_is_const(op->args[2])) { + return false; + } + + a_zmask = arg_info(op->args[1])->z_mask; + b_val = arg_info(op->args[2])->val; + cond = op->args[3]; + + if (ctx->type == TCG_TYPE_I32) { + a_zmask = (uint32_t)a_zmask; + b_val = (uint32_t)b_val; + } + + /* + * A with only low bits set vs B with high bits set means that A < B. + */ + if (a_zmask < b_val) { + bool inv = false; + + switch (cond) { + case TCG_COND_NE: + case TCG_COND_LEU: + case TCG_COND_LTU: + inv = true; + /* fall through */ + case TCG_COND_GTU: + case TCG_COND_GEU: + case TCG_COND_EQ: + return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv); + default: + break; + } + } + + /* + * A with only lsb set is already boolean. + */ + if (a_zmask <= 1) { + bool convert = false; + bool inv = false; + + switch (cond) { + case TCG_COND_EQ: + inv = true; + /* fall through */ + case TCG_COND_NE: + convert = (b_val == 0); + break; + case TCG_COND_LTU: + case TCG_COND_TSTEQ: + inv = true; + /* fall through */ + case TCG_COND_GEU: + case TCG_COND_TSTNE: + convert = (b_val == 1); + break; + default: + break; + } + if (convert) { + TCGOpcode add_opc, xor_opc, neg_opc; + + if (!inv && !neg) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); + } + + switch (ctx->type) { + case TCG_TYPE_I32: + add_opc = INDEX_op_add_i32; + neg_opc = INDEX_op_neg_i32; + xor_opc = INDEX_op_xor_i32; + break; + case TCG_TYPE_I64: + add_opc = INDEX_op_add_i64; + neg_opc = INDEX_op_neg_i64; + xor_opc = INDEX_op_xor_i64; + break; + default: + g_assert_not_reached(); + } + + if (!inv) { + op->opc = neg_opc; + } else if (neg) { + op->opc = add_opc; + op->args[2] = arg_new_constant(ctx, -1); + } else { + op->opc = xor_opc; + op->args[2] = arg_new_constant(ctx, 1); + } + return false; + } + } + + return false; +} + static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; @@ -2200,6 +2302,10 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op) if (i >= 0) { return tcg_opt_gen_movi(ctx, op, op->args[0], i); } + + if (fold_setcond_zmask(ctx, op, false)) { + return true; + } fold_setcond_tst_pow2(ctx, op, false); ctx->z_mask = 1; @@ -2214,6 +2320,10 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op) if (i >= 0) { return tcg_opt_gen_movi(ctx, op, op->args[0], -i); } + + if (fold_setcond_zmask(ctx, op, true)) { + return true; + } fold_setcond_tst_pow2(ctx, op, true); /* Value is {0,-1} so all bits are repetitions of the sign. */ diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index bb88943f79..0308732d9b 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -785,7 +785,8 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs, } static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, int32_t c, bool load_dest, + uint32_t oprsz, int32_t c, + bool load_dest, bool write_aofs, void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t)) { TCGv_i32 t0 = tcg_temp_new_i32(); @@ -801,6 +802,9 @@ static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, } fni(t2, t0, t1, c); tcg_gen_st_i32(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_i32(t0, tcg_env, aofs + i); + } } tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); @@ -944,7 +948,8 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs, } static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, int64_t c, bool load_dest, + uint32_t oprsz, int64_t c, + bool load_dest, bool write_aofs, void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t)) { TCGv_i64 t0 = tcg_temp_new_i64(); @@ -960,6 +965,9 @@ static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, } fni(t2, t0, t1, c); tcg_gen_st_i64(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_i64(t0, tcg_env, aofs + i); + } } tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -1102,7 +1110,8 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, */ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t tysz, - TCGType type, int64_t c, bool load_dest, + TCGType type, int64_t c, + bool load_dest, bool write_aofs, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t)) { @@ -1118,6 +1127,9 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, } fni(vece, t2, t0, t1, c); tcg_gen_st_vec(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_vec(t0, tcg_env, aofs + i); + } } } @@ -1471,7 +1483,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, */ some = QEMU_ALIGN_DOWN(oprsz, 32); expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, - c, g->load_dest, g->fniv); + c, g->load_dest, g->write_aofs, g->fniv); if (some == oprsz) { break; } @@ -1483,18 +1495,20 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, /* fallthru */ case TCG_TYPE_V128: expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, - c, g->load_dest, g->fniv); + c, g->load_dest, g->write_aofs, g->fniv); break; case TCG_TYPE_V64: expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, - c, g->load_dest, g->fniv); + c, g->load_dest, g->write_aofs, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8); + expand_3i_i64(dofs, aofs, bofs, oprsz, c, + g->load_dest, g->write_aofs, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4); + expand_3i_i32(dofs, aofs, bofs, oprsz, c, + g->load_dest, g->write_aofs, g->fni4); } else { assert(g->fno != NULL); tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno); |