target/i386: fix packusdw in-place operation

The SSE4.1 packusdw instruction combines source and destination vectors of signed 32-bit integers into a single vector of unsigned 16-bit integers, with unsigned saturation. When the source and destination are the same register, this means each 32-bit element of that register is used twice as an input, to produce two of the 16-bit output elements, and so if the operation is carried out element-by-element in-place, no matter what the order in which it is applied to the elements, the first element's operation will overwrite some future input. The helper for packssdw avoids this issue by computing the result in a local temporary and copying it to the destination at the end; this patch fixes the packusdw helper to do likewise. This fixes three gcc test failures in my GCC 6-based testing. Signed-off-by: Joseph Myers <joseph@codesourcery.com> Message-Id: <alpine.DEB.2.20.1708100023050.9262@digraph.polyomino.org.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Joseph Myers <joseph@codesourcery.com> 2017-08-10 00:24:23 +0000
committer: Paolo Bonzini <pbonzini@redhat.com> 2017-09-19 14:09:10 +0200
commit: 80e19606215d4df370dfe8fe21c558a129f00f0b (patch)
tree: 5ccef05842c903a802663c1f47f0c9b4ca84ed28
parent: c6a8242915328cda0df0fbc0803da3448137e614 (diff)
1 files changed, 11 insertions, 8 deletions
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index d5782167d1..05b170125a 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1655,14 +1655,17 @@ SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ)
 
 void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-    d->W(0) = satuw((int32_t) d->L(0));
-    d->W(1) = satuw((int32_t) d->L(1));
-    d->W(2) = satuw((int32_t) d->L(2));
-    d->W(3) = satuw((int32_t) d->L(3));
-    d->W(4) = satuw((int32_t) s->L(0));
-    d->W(5) = satuw((int32_t) s->L(1));
-    d->W(6) = satuw((int32_t) s->L(2));
-    d->W(7) = satuw((int32_t) s->L(3));
+    Reg r;
+
+    r.W(0) = satuw((int32_t) d->L(0));
+    r.W(1) = satuw((int32_t) d->L(1));
+    r.W(2) = satuw((int32_t) d->L(2));
+    r.W(3) = satuw((int32_t) d->L(3));
+    r.W(4) = satuw((int32_t) s->L(0));
+    r.W(5) = satuw((int32_t) s->L(1));
+    r.W(6) = satuw((int32_t) s->L(2));
+    r.W(7) = satuw((int32_t) s->L(3));
+    *d = r;
 }
 
 #define FMINSB(d, s) MIN((int8_t)d, (int8_t)s)
author	Joseph Myers <joseph@codesourcery.com>	2017-08-10 00:24:23 +0000
committer	Paolo Bonzini <pbonzini@redhat.com>	2017-09-19 14:09:10 +0200
commit	80e19606215d4df370dfe8fe21c558a129f00f0b (patch)
tree	5ccef05842c903a802663c1f47f0c9b4ca84ed28
parent	c6a8242915328cda0df0fbc0803da3448137e614 (diff)