diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-04-19 18:22:15 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-04-19 18:22:16 -0700 |
commit | 1be5a765c08cee3a9587c8a8d3fc2ea247b13f9c (patch) | |
tree | 30ace22866ea218524bf1a64818fc20a93a414e3 /target/i386/ops_sse.h | |
parent | 3202995c13a7484b7d69c43f148354c537bf87de (diff) | |
parent | c9e28ae7972a10fdf09b7ebd8046840d1101b8ce (diff) |
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* Add cpu0-id to query-sev-capabilities
* whpx support for breakpoints and stepping
* initial support for Hyper-V Synthetic Debugging
* use monotonic clock for QemuCond and QemuSemaphore
* Remove qemu-common.h include from most units and lots of other clenaups
* do not include headers for all virtio devices in virtio-ccw.h
# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmJXCQAUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroNT6wf+NHDJUEdDiwaVGVTGXgHuiaycsymi
# FpNPiw/+XxSGN5xF3fkUGgqaDrcwIYwVfnXlghKSz8kp1cP3cjxa5CzNMLGTp5je
# N6BxFbD7yC6dhagGm3mj32jlsptv3M38OHqKc3t+RaUAotP5RF2VdCyfUBLG6vU0
# aMzvMfMtB5aG0D8Fr5EV63t1JMTceFU0YxsG73UCFs2Yx4Z0cGBbNxMbHweRhd1q
# tPeVDS46MFPM3/2cGGHpeeqxkoCTU7A9j1VuNQI3k+Kg+6W5YVxiK/UP7bw77E/a
# yAHsmIVTNro8ajMBch73weuHtGtdfFLvCKc6QX6aVjzK4dF1voQ01E7gPQ==
# =rMle
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 13 Apr 2022 10:31:44 AM PDT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [undefined]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (53 commits)
target/i386: Remove unused XMMReg, YMMReg types and CPUState fields
target/i386: do not access beyond the low 128 bits of SSE registers
virtio-ccw: do not include headers for all virtio devices
virtio-ccw: move device type declarations to .c files
virtio-ccw: move vhost_ccw_scsi to a separate file
s390x: follow qdev tree to detect SCSI device on a CCW bus
hw: hyperv: Initial commit for Synthetic Debugging device
hyperv: Add support to process syndbg commands
hyperv: Add definitions for syndbg
hyperv: SControl is optional to enable SynIc
thread-posix: optimize qemu_sem_timedwait with zero timeout
thread-posix: implement Semaphore with QemuCond and QemuMutex
thread-posix: use monotonic clock for QemuCond and QemuSemaphore
thread-posix: remove the posix semaphore support
whpx: Added support for breakpoints and stepping
build-sys: simplify AF_VSOCK check
build-sys: drop ntddscsi.h check
Remove qemu-common.h include from most units
qga: remove explicit environ argument from exec/spawn
Move fcntl_setfl() to oslib-posix
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/i386/ops_sse.h')
-rw-r--r-- | target/i386/ops_sse.h | 75 |
1 files changed, 47 insertions, 28 deletions
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 6f1fc174b3..e4d74b814a 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -22,6 +22,7 @@ #if SHIFT == 0 #define Reg MMXReg +#define SIZE 8 #define XMM_ONLY(...) #define B(n) MMX_B(n) #define W(n) MMX_W(n) @@ -30,6 +31,7 @@ #define SUFFIX _mmx #else #define Reg ZMMReg +#define SIZE 16 #define XMM_ONLY(...) __VA_ARGS__ #define B(n) ZMM_B(n) #define W(n) ZMM_W(n) @@ -38,6 +40,22 @@ #define SUFFIX _xmm #endif +/* + * Copy the relevant parts of a Reg value around. In the case where + * sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of + * a 64 byte ZMMReg, so we must copy only those and keep the top bytes + * untouched in the guest-visible destination destination register. + * Note that the "lower bytes" are placed last in memory on big-endian + * hosts, which store the vector backwards in memory. In that case the + * copy *starts* at B(SIZE - 1) and ends at B(0), the opposite of + * the little-endian case. + */ +#if HOST_BIG_ENDIAN +#define MOVE(d, r) memcpy(&((d).B(SIZE - 1)), &(r).B(SIZE - 1), SIZE) +#else +#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE) +#endif + void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { int shift; @@ -516,7 +534,7 @@ void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order) r.W(1) = s->W((order >> 2) & 3); r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); - *d = r; + MOVE(*d, r); } #else void helper_shufps(Reg *d, Reg *s, int order) @@ -527,7 +545,7 @@ void helper_shufps(Reg *d, Reg *s, int order) r.L(1) = d->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); - *d = r; + MOVE(*d, r); } void helper_shufpd(Reg *d, Reg *s, int order) @@ -536,7 +554,7 @@ void helper_shufpd(Reg *d, Reg *s, int order) r.Q(0) = d->Q(order & 1); r.Q(1) = s->Q((order >> 1) & 1); - *d = r; + MOVE(*d, r); } void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order) @@ -547,7 +565,7 @@ void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order) r.L(1) = s->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); - *d = r; + MOVE(*d, r); } void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order) @@ -559,7 +577,7 @@ void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order) r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); r.Q(1) = s->Q(1); - *d = r; + MOVE(*d, r); } void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order) @@ -571,7 +589,7 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order) r.W(5) = s->W(4 + ((order >> 2) & 3)); r.W(6) = s->W(4 + ((order >> 4) & 3)); r.W(7) = s->W(4 + ((order >> 6) & 3)); - *d = r; + MOVE(*d, r); } #endif @@ -937,7 +955,7 @@ void helper_haddps(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -946,7 +964,7 @@ void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -957,7 +975,7 @@ void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -966,7 +984,7 @@ void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_addsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -1153,7 +1171,7 @@ void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(14) = satsb((int16_t)s->W(6)); r.B(15) = satsb((int16_t)s->W(7)); #endif - *d = r; + MOVE(*d, r); } void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1180,7 +1198,7 @@ void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(14) = satub((int16_t)s->W(6)); r.B(15) = satub((int16_t)s->W(7)); #endif - *d = r; + MOVE(*d, r); } void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1199,7 +1217,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.W(6) = satsw(s->L(2)); r.W(7) = satsw(s->L(3)); #endif - *d = r; + MOVE(*d, r); } #define UNPCK_OP(base_name, base) \ @@ -1227,7 +1245,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(14) = d->B((base << (SHIFT + 2)) + 7); \ r.B(15) = s->B((base << (SHIFT + 2)) + 7); \ ) \ - *d = r; \ + MOVE(*d, r); \ } \ \ void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\ @@ -1245,7 +1263,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.W(6) = d->W((base << (SHIFT + 1)) + 3); \ r.W(7) = s->W((base << (SHIFT + 1)) + 3); \ ) \ - *d = r; \ + MOVE(*d, r); \ } \ \ void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\ @@ -1259,7 +1277,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.L(2) = d->L((base << SHIFT) + 1); \ r.L(3) = s->L((base << SHIFT) + 1); \ ) \ - *d = r; \ + MOVE(*d, r); \ } \ \ XMM_ONLY( \ @@ -1272,7 +1290,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ r.Q(0) = d->Q(base); \ r.Q(1) = s->Q(base); \ - *d = r; \ + MOVE(*d, r); \ } \ ) @@ -1313,7 +1331,7 @@ void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + MOVE(*d, r); } void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1378,7 +1396,7 @@ void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + MOVE(*d, r); } void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1387,7 +1405,7 @@ void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + MOVE(*d, r); } void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1424,7 +1442,7 @@ void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_L(0) = s->MMX_L(1); r.MMX_L(1) = s->MMX_L(0); - *d = r; + MOVE(*d, r); } #endif @@ -1438,7 +1456,7 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); } - *d = r; + MOVE(*d, r); } void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1455,7 +1473,7 @@ void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); - *d = r; + MOVE(*d, r); } void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1467,7 +1485,7 @@ void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); - *d = r; + MOVE(*d, r); } void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1483,7 +1501,7 @@ void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); - *d = r; + MOVE(*d, r); } void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1585,7 +1603,7 @@ void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, #undef SHR } - *d = r; + MOVE(*d, r); } #define XMM0 (env->xmm_regs[0]) @@ -1718,7 +1736,7 @@ void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.W(5) = satuw((int32_t) s->L(1)); r.W(6) = satuw((int32_t) s->L(2)); r.W(7) = satuw((int32_t) s->L(3)); - *d = r; + MOVE(*d, r); } #define FMINSB(d, s) MIN((int8_t)d, (int8_t)s) @@ -1984,7 +2002,7 @@ void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3)); } - *d = r; + MOVE(*d, r); } /* SSE4.2 op helpers */ @@ -2324,3 +2342,4 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, #undef L #undef Q #undef SUFFIX +#undef SIZE |