diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-08-29 18:28:33 +0200 |
---|---|---|
committer | Michael Tokarev <mjt@tls.msk.ru> | 2023-10-04 17:58:16 +0300 |
commit | 755cf29eade59550eaf0d676f8f98d2111465cf3 (patch) | |
tree | 57c45c2fa260764f79c022b16479e97d2f9fd41b | |
parent | 796468c24a8ff2b752710d5577d0bda545ed3a46 (diff) |
target/i386: fix memory operand size for CVTPS2PD
CVTPS2PD only loads a half-register for memory, unlike the other
operations under 0x0F 0x5A. "Unpack" the group into separate
emission functions instead of using gen_unary_fp_sse.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit abd41884c530aa025ada253bf1a5bd0c2b808219)
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
-rw-r--r-- | target/i386/tcg/decode-new.c.inc | 14 | ||||
-rw-r--r-- | target/i386/tcg/emit.c.inc | 30 |
2 files changed, 37 insertions, 7 deletions
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index a894e874bc..528e2fdfbb 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -805,10 +805,20 @@ static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entr case 0x51: entry->gen = gen_VSQRT; break; case 0x52: entry->gen = gen_VRSQRT; break; case 0x53: entry->gen = gen_VRCP; break; - case 0x5A: entry->gen = gen_VCVTfp2fp; break; } } +static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F5A[4] = { + X86_OP_ENTRY2(VCVTPS2PD, V,x, W,xh, vex2), /* VCVTPS2PD */ + X86_OP_ENTRY2(VCVTPD2PS, V,x, W,x, vex2), /* VCVTPD2PS */ + X86_OP_ENTRY3(VCVTSS2SD, V,x, H,x, W,x, vex2_rep3), /* VCVTSS2SD */ + X86_OP_ENTRY3(VCVTSD2SS, V,x, H,x, W,x, vex2_rep3), /* VCVTSD2SS */ + }; + *entry = *decode_by_prefix(s, opcodes_0F5A); +} + static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) { static const X86OpEntry opcodes_0F5B[4] = { @@ -891,7 +901,7 @@ static const X86OpEntry opcodes_0F[256] = { [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* CVTPS2PD */ + [0x5a] = X86_OP_GROUP0(0F5A), [0x5b] = X86_OP_GROUP0(0F5B), [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 5d31fce65d..d6a9de8b3d 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1917,12 +1917,22 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) set_cc_op(s, CC_OP_EFLAGS); } -static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - gen_unary_fp_sse(s, env, decode, - gen_helper_cvtpd2ps_xmm, gen_helper_cvtps2pd_xmm, - gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm, - gen_helper_cvtsd2ss, gen_helper_cvtss2sd); + if (s->vex_l) { + gen_helper_cvtpd2ps_ymm(cpu_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtpd2ps_xmm(cpu_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (s->vex_l) { + gen_helper_cvtps2pd_ymm(cpu_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtps2pd_xmm(cpu_env, OP_PTR0, OP_PTR2); + } } static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1939,6 +1949,16 @@ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } +static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_cvtsd2ss(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_cvtss2sd(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); |