aboutsummaryrefslogtreecommitdiff
path: root/tcg/tcg-op-gvec.c
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2020-12-15 11:47:59 -0600
committerRichard Henderson <richard.henderson@linaro.org>2021-01-04 06:32:58 -1000
commit6d3ef04893bdea3e7aa08be3cce5141902836a31 (patch)
treeeae5ae6605ba168477024c0497e77c7c0c1e74ee /tcg/tcg-op-gvec.c
parent41192db338588051f21501abc13743e62b0a5605 (diff)
tcg: Use memset for large vector byte replication
In f47db80cc07, we handled odd-sized tail clearing for the case of hosts that have vector operations, but did not handle the case of hosts that do not have vector ops. This was ok until e2e7168a214b, which changed the encoding of simd_desc such that the odd sizes are impossible. Add memset as a tcg helper, and use that for all out-of-line byte stores to vectors. This includes, but is not limited to, the tail clearing operation in question. Cc: qemu-stable@nongnu.org Buglink: https://bugs.launchpad.net/bugs/1907817 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg/tcg-op-gvec.c')
-rw-r--r--tcg/tcg-op-gvec.c32
1 files changed, 32 insertions, 0 deletions
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index ddbe06b71a..1a41dfa908 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -547,6 +547,9 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
in_c = dup_const(vece, in_c);
if (in_c == 0) {
oprsz = maxsz;
+ vece = MO_8;
+ } else if (in_c == dup_const(MO_8, in_c)) {
+ vece = MO_8;
}
}
@@ -628,6 +631,35 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
/* Otherwise implement out of line. */
t_ptr = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
+
+ /*
+ * This may be expand_clr for the tail of an operation, e.g.
+ * oprsz == 8 && maxsz == 64. The size of the clear is misaligned
+ * wrt simd_desc and will assert. Simply pass all replicated byte
+ * stores through to memset.
+ */
+ if (oprsz == maxsz && vece == MO_8) {
+ TCGv_ptr t_size = tcg_const_ptr(oprsz);
+ TCGv_i32 t_val;
+
+ if (in_32) {
+ t_val = in_32;
+ } else if (in_64) {
+ t_val = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t_val, in_64);
+ } else {
+ t_val = tcg_const_i32(in_c);
+ }
+ gen_helper_memset(t_ptr, t_ptr, t_val, t_size);
+
+ if (!in_32) {
+ tcg_temp_free_i32(t_val);
+ }
+ tcg_temp_free_ptr(t_size);
+ tcg_temp_free_ptr(t_ptr);
+ return;
+ }
+
t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
if (vece == MO_64) {