diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2024-01-04 19:55:19 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2024-01-04 19:55:20 +0000 |
commit | 05470c3979d5485003e129ff4b0c2ef98af91d86 (patch) | |
tree | 62455044099aebd9c5333f4491e7e588101d10ac /target | |
parent | d328fef93ae757a0dd65ed786a4086e27952eef3 (diff) | |
parent | f705c1f25d9a075534f8279048082af4ce2066bf (diff) |
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* configure: use a native non-cross compiler for linux-user
* meson: cleanups
* target/i386: miscellaneous cleanups and optimizations
* target/i386: implement CMPccXADD
* target/i386: the sgx_epc_get_section stub is reachable
* esp: check for NULL result from scsi_device_find()
# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmWRImYUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroNd7AgAgcyJGiMfUkXqhefplpm06RDXQIa8
# FuoJqPb21lO75DQKfaFRAc4xGLagjJROMJGHMm9HvMu2VlwvOydkQlfFRspENxQ/
# 5XzGdb/X0A7HA/mwUfnMB1AZx0Vs32VI5IBSc6acc9fmgeZ84XQEoM3KBQHUik7X
# mSkE4eltR9gJ+4IaGo4voZtK+YoVD8nEcuqmnKihSPWizev0FsZ49aNMtaYa9qC/
# Xs3kiQd/zPibHDHJu0ulFsNZgxtUcvlLHTCf8gO4dHWxCFLXGubMush83McpRtNB
# Qoh6cTLH+PBXfrxMR3zmTZMNvo8Euls3s07Y8TkNP4vdIIE/kMeMDW1wJw==
# =mq30
# -----END PGP SIGNATURE-----
# gpg: Signature made Sun 31 Dec 2023 08:12:22 GMT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (46 commits)
meson.build: report graphics backends separately
configure, meson: rename targetos to host_os
meson: rename config_all
meson: remove CONFIG_ALL
meson: remove config_targetos
meson: remove CONFIG_POSIX and CONFIG_WIN32 from config_targetos
meson: remove OS definitions from config_targetos
meson: always probe u2f and canokey if the option is enabled
meson: move subdirs to "Collect sources" section
meson: move config-host.h definitions together
meson: move CFI detection code with other compiler flags
meson: keep subprojects together
meson: move accelerator dependency checks together
meson: move option validation together
meson: move program checks together
meson: add more sections to main meson.build
configure: unify again the case arms in probe_target_compiler
configure: remove unnecessary subshell
Makefile: clean qemu-iotests output
meson: use version_compare() to compare version
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/meson.build | 2 | ||||
-rw-r--r-- | target/i386/cpu.c | 2 | ||||
-rw-r--r-- | target/i386/cpu.h | 5 | ||||
-rw-r--r-- | target/i386/tcg/cc_helper.c | 6 | ||||
-rw-r--r-- | target/i386/tcg/decode-new.c.inc | 152 | ||||
-rw-r--r-- | target/i386/tcg/decode-new.h | 29 | ||||
-rw-r--r-- | target/i386/tcg/emit.c.inc | 220 | ||||
-rw-r--r-- | target/i386/tcg/excp_helper.c | 7 | ||||
-rw-r--r-- | target/i386/tcg/fpu_helper.c | 10 | ||||
-rw-r--r-- | target/i386/tcg/helper-tcg.h | 3 | ||||
-rw-r--r-- | target/i386/tcg/int_helper.c | 8 | ||||
-rw-r--r-- | target/i386/tcg/misc_helper.c | 4 | ||||
-rw-r--r-- | target/i386/tcg/seg_helper.c | 8 | ||||
-rw-r--r-- | target/i386/tcg/translate.c | 160 | ||||
-rw-r--r-- | target/mips/meson.build | 2 |
15 files changed, 421 insertions, 197 deletions
diff --git a/target/arm/meson.build b/target/arm/meson.build index d6c3902e67..46b5a21eb3 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -28,7 +28,7 @@ arm_system_ss.add(files( subdir('hvf') -if 'CONFIG_TCG' in config_all +if 'CONFIG_TCG' in config_all_accel subdir('tcg') else arm_ss.add(files('tcg-stubs.c')) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 95d5f16cd5..fd47ee7def 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -738,7 +738,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_0_EDX_FEATURES (CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_KERNEL_FEATURES) #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ - CPUID_7_1_EAX_FSRC) + CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD) #define TCG_7_1_EDX_FEATURES 0 #define TCG_7_2_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ef987f344c..7f0786e8b9 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1285,6 +1285,7 @@ typedef enum { CC_OP_NB, } CCOp; +QEMU_BUILD_BUG_ON(CC_OP_NB >= 128); typedef struct SegmentCache { uint32_t selector; @@ -2344,13 +2345,13 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, int flags); -uint32_t cpu_cc_compute_all(CPUX86State *env1, int op); +uint32_t cpu_cc_compute_all(CPUX86State *env1); static inline uint32_t cpu_compute_eflags(CPUX86State *env) { uint32_t eflags = env->eflags; if (tcg_enabled()) { - eflags |= cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK); + eflags |= cpu_cc_compute_all(env) | (env->df & DF_MASK); } return eflags; } diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c index c310bd842f..f76e9cb8cf 100644 --- a/target/i386/tcg/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -220,9 +220,9 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, } } -uint32_t cpu_cc_compute_all(CPUX86State *env, int op) +uint32_t cpu_cc_compute_all(CPUX86State *env) { - return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op); + return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP); } target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, @@ -335,7 +335,7 @@ target_ulong helper_read_eflags(CPUX86State *env) { uint32_t eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); eflags |= (env->df & DF_MASK); eflags |= env->eflags & ~(VM_MASK | RF_MASK); return eflags; diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 2bdbb1bba0..426c459412 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -26,6 +26,13 @@ * size (X86_SIZE_*) codes used in the manual. There are a few differences * though. * + * Operand sizes + * ------------- + * + * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64 + * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the + * "v" or "z" sizes. The decoder simply makes them separate operand sizes. + * * Vector operands * --------------- * @@ -44,6 +51,11 @@ * if the difference is expressed via prefixes. Individual instructions * are separated by prefix in the generator functions. * + * There is a custom size "xh" used to address half of a SSE/AVX operand. + * This points to a 64-bit operand for SSE operations, 128-bit operand + * for 256-bit AVX operands, etc. It is used for conversion operations + * such as VCVTPH2PS or VCVTSS2SD. + * * There are a couple cases in which instructions (e.g. MOVD) write the * whole XMM or MM register but are established incorrectly in the manual * as "d" or "q". These have to be fixed for the decoder to work correctly. @@ -139,10 +151,13 @@ #define cpuid(feat) .cpuid = X86_FEAT_##feat, #define xchg .special = X86_SPECIAL_Locked, +#define lock .special = X86_SPECIAL_HasLock, #define mmx .special = X86_SPECIAL_MMX, -#define zext0 .special = X86_SPECIAL_ZExtOp0, -#define zext2 .special = X86_SPECIAL_ZExtOp2, +#define op0_Rd .special = X86_SPECIAL_Op0_Rd, +#define op2_Ry .special = X86_SPECIAL_Op2_Ry, #define avx_movx .special = X86_SPECIAL_AVXExtMov, +#define sextT0 .special = X86_SPECIAL_SExtT0, +#define zextT0 .special = X86_SPECIAL_ZExtT0, #define vex1 .vex_class = 1, #define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar, @@ -523,6 +538,28 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = { [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), [0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66), [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), + + /* + * REG selects srcdest2 operand, VEX.vvvv selects src3. VEX class not found + * in manual, assumed to be 13 from the VEX.L0 constraint. + */ + [0xe0] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe1] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe2] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe3] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe4] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe5] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe6] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe7] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + + [0xe8] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe9] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xea] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xeb] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xec] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xed] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xee] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xef] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), }; /* five rows for no prefix, 66, F3, F2, 66+F2 */ @@ -558,8 +595,8 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { [5] = { X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)), {}, - X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)), - X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)), + X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), + X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), {}, }, [6] = { @@ -570,10 +607,10 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { {}, }, [7] = { - X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)), - X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)), - X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)), + X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), {}, }, }; @@ -619,13 +656,13 @@ static const X86OpEntry opcodes_0F3A[256] = { [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66), [0x06] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), - [0x14] = X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE41) zext0 p_66), - [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) zext0 p_66), + [0x14] = X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), + [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), [0x16] = X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE41) p_66), [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE41) p_66), [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,xh, V,x, I,b, vex11 chk(W0) cpuid(F16C) p_66), - [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) zext2 p_66), + [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) op2_Ry p_66), [0x21] = X86_OP_GROUP0(VINSERTPS), [0x22] = X86_OP_ENTRY4(PINSR, V,dq, H,dq, E,y, vex5 cpuid(SSE41) p_66), @@ -1091,10 +1128,6 @@ static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod { int modrm = get_modrm(s, env); if ((modrm >> 6) == 3) { - if (s->prefix & PREFIX_LOCK) { - decode->e.gen = gen_illegal; - return 0xff; - } op->n = (modrm & 7); if (type != X86_TYPE_Q && type != X86_TYPE_N) { op->n |= REX_B(s); @@ -1201,6 +1234,8 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, case X86_TYPE_None: /* Implicit or absent */ case X86_TYPE_A: /* Implicit */ case X86_TYPE_F: /* EFLAGS/RFLAGS */ + case X86_TYPE_X: /* string source */ + case X86_TYPE_Y: /* string destination */ break; case X86_TYPE_B: /* VEX.vvvv selects a GPR */ @@ -1316,43 +1351,15 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, } case X86_TYPE_I: /* Immediate */ - op->unit = X86_OP_IMM; - decode->immediate = insn_get_signed(env, s, op->ot); - break; - case X86_TYPE_J: /* Relative offset for a jump */ op->unit = X86_OP_IMM; decode->immediate = insn_get_signed(env, s, op->ot); - decode->immediate += s->pc - s->cs_base; - if (s->dflag == MO_16) { - decode->immediate &= 0xffff; - } else if (!CODE64(s)) { - decode->immediate &= 0xffffffffu; - } break; case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */ op->n = insn_get(env, s, op->ot) >> 4; break; - case X86_TYPE_X: /* string source */ - op->n = -1; - decode->mem = (AddressParts) { - .def_seg = R_DS, - .base = R_ESI, - .index = -1, - }; - break; - - case X86_TYPE_Y: /* string destination */ - op->n = -1; - decode->mem = (AddressParts) { - .def_seg = R_ES, - .base = R_EDI, - .index = -1, - }; - break; - case X86_TYPE_2op: *op = decode->op[0]; break; @@ -1518,6 +1525,9 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2); case X86_FEAT_SHA_NI: return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI); + + case X86_FEAT_CMPCCXADD: + return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD); } g_assert_not_reached(); } @@ -1677,6 +1687,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) bool first = true; X86DecodedInsn decode; X86DecodeFunc decode_func = decode_root; + uint8_t cc_live; s->has_modrm = false; @@ -1830,6 +1841,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) } memset(&decode, 0, sizeof(decode)); + decode.cc_op = -1; decode.b = b; if (!decode_insn(s, env, decode_func, &decode)) { goto illegal_op; @@ -1869,19 +1881,22 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) if (decode.op[0].has_ea) { s->prefix |= PREFIX_LOCK; } + decode.e.special = X86_SPECIAL_HasLock; + /* fallthrough */ + case X86_SPECIAL_HasLock: break; - case X86_SPECIAL_ZExtOp0: + case X86_SPECIAL_Op0_Rd: assert(decode.op[0].unit == X86_OP_INT); if (!decode.op[0].has_ea) { decode.op[0].ot = MO_32; } break; - case X86_SPECIAL_ZExtOp2: + case X86_SPECIAL_Op2_Ry: assert(decode.op[2].unit == X86_OP_INT); if (!decode.op[2].has_ea) { - decode.op[2].ot = MO_32; + decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag; } break; @@ -1893,10 +1908,22 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) } break; + case X86_SPECIAL_SExtT0: + case X86_SPECIAL_ZExtT0: + /* Handled in gen_load. */ + assert(decode.op[1].unit == X86_OP_INT); + break; + default: break; } + if (s->prefix & PREFIX_LOCK) { + if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) { + goto illegal_op; + } + } + if (!validate_vex(s, &decode)) { return; } @@ -1940,9 +1967,6 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); } if (s->prefix & PREFIX_LOCK) { - if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) { - goto illegal_op; - } gen_load(s, &decode, 2, s->T1); decode.e.gen(s, env, &decode); } else { @@ -1956,6 +1980,38 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) decode.e.gen(s, env, &decode); gen_writeback(s, &decode, 0, s->T0); } + + /* + * Write back flags after last memory access. Some newer ALU instructions, as + * well as SSE instructions, write flags in the gen_* function, but that can + * cause incorrect tracking of CC_OP for instructions that write to both memory + * and flags. + */ + if (decode.cc_op != -1) { + if (decode.cc_dst) { + tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst); + } + if (decode.cc_src) { + tcg_gen_mov_tl(cpu_cc_src, decode.cc_src); + } + if (decode.cc_src2) { + tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2); + } + if (decode.cc_op == CC_OP_DYNAMIC) { + tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic); + } + set_cc_op(s, decode.cc_op); + cc_live = cc_op_live[decode.cc_op]; + } else { + cc_live = 0; + } + if (decode.cc_op != CC_OP_DYNAMIC) { + assert(!decode.cc_op_dynamic); + assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST)); + assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC)); + assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2)); + } + return; gp_fault: gen_exception_gpf(s); diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index e6c904a319..15e6bfef4b 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -104,6 +104,7 @@ typedef enum X86CPUIDFeature { X86_FEAT_AVX2, X86_FEAT_BMI1, X86_FEAT_BMI2, + X86_FEAT_CMPCCXADD, X86_FEAT_F16C, X86_FEAT_FMA, X86_FEAT_MOVBE, @@ -158,15 +159,27 @@ typedef enum X86InsnCheck { typedef enum X86InsnSpecial { X86_SPECIAL_None, + /* Accepts LOCK prefix; LOCKed operations do not load or writeback operand 0 */ + X86_SPECIAL_HasLock, + /* Always locked if it has a memory operand (XCHG) */ X86_SPECIAL_Locked, /* - * Register operand 0/2 is zero extended to 32 bits. Rd/Mb or Rd/Mw - * in the manual. + * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits + * (and writeback zero-extends it to 64 bits if applicable). PREFIX_DATA + * does not trigger 16-bit writeback and, as a side effect, high-byte + * registers are never used. + */ + X86_SPECIAL_Op0_Rd, + + /* + * Ry/Mb in the manual (PINSRB). However, the high bits are never used by + * the instruction in either the register or memory cases; the *real* effect + * of this modifier is that high-byte registers are never used, even without + * a REX prefix. Therefore, PINSRW does not need it despite having Ry/Mw. */ - X86_SPECIAL_ZExtOp0, - X86_SPECIAL_ZExtOp2, + X86_SPECIAL_Op2_Ry, /* * Register operand 2 is extended to full width, while a memory operand @@ -179,6 +192,10 @@ typedef enum X86InsnSpecial { * become P/P/Q/N, and size "x" becomes "q". */ X86_SPECIAL_MMX, + + /* When loaded into s->T0, register operand 1 is zero/sign extended. */ + X86_SPECIAL_SExtT0, + X86_SPECIAL_ZExtT0, } X86InsnSpecial; /* @@ -267,6 +284,10 @@ struct X86DecodedInsn { target_ulong immediate; AddressParts mem; + TCGv cc_dst, cc_src, cc_src2; + TCGv_i32 cc_op_dynamic; + int8_t cc_op; + uint8_t b; }; diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 82da5488d4..6bcf88ecd7 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -55,11 +55,6 @@ static void gen_NM_exception(DisasContext *s) gen_exception(s, EXCP07_PREX); } -static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) -{ - gen_illegal_opcode(s); -} - static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) { TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); @@ -237,9 +232,30 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) break; case X86_OP_INT: if (op->has_ea) { - gen_op_ld_v(s, op->ot, v, s->A0); + if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { + gen_op_ld_v(s, op->ot | MO_SIGN, v, s->A0); + } else { + gen_op_ld_v(s, op->ot, v, s->A0); + } + + } else if (op->ot == MO_8 && byte_reg_is_xH(s, op->n)) { + if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { + tcg_gen_sextract_tl(v, cpu_regs[op->n - 4], 8, 8); + } else { + tcg_gen_extract_tl(v, cpu_regs[op->n - 4], 8, 8); + } + + } else if (op->ot < MO_TL && v == s->T0 && + (decode->e.special == X86_SPECIAL_SExtT0 || + decode->e.special == X86_SPECIAL_ZExtT0)) { + if (decode->e.special == X86_SPECIAL_SExtT0) { + tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot | MO_SIGN); + } else { + tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot); + } + } else { - gen_op_mov_v_reg(s, op->ot, v, op->n); + tcg_gen_mov_tl(v, cpu_regs[op->n]); } break; case X86_OP_IMM: @@ -323,6 +339,19 @@ static inline int vector_len(DisasContext *s, X86DecodedInsn *decode) return s->vex_l ? 32 : 16; } +static void prepare_update1_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) +{ + decode->cc_dst = s->T0; + decode->cc_op = op; +} + +static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) +{ + decode->cc_src = s->T1; + decode->cc_dst = s->T0; + decode->cc_op = op; +} + static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) { MemOp ot = decode->op[0].ot; @@ -1011,6 +1040,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod VSIB_AVX(VPGATHERD, vpgatherd) VSIB_AVX(VPGATHERQ, vpgatherq) +/* ADCX/ADOX do not have memory operands and can use set_cc_op. */ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) { int opposite_cc_op; @@ -1073,8 +1103,7 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) MemOp ot = decode->op[0].ot; tcg_gen_andc_tl(s->T0, s->T1, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1089,9 +1118,6 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) * Shifts larger than operand size get zeros. */ tcg_gen_ext8u_tl(s->A0, s->T1); - if (TARGET_LONG_BITS == 64 && ot == MO_32) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } tcg_gen_shr_tl(s->T0, s->T0, s->A0); tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); @@ -1105,10 +1131,10 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); tcg_gen_andc_tl(s->T0, s->T0, s->T1); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } +/* BLSI do not have memory operands and can use set_cc_op. */ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1120,6 +1146,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) set_cc_op(s, CC_OP_BMILGB + ot); } +/* BLSMSK do not have memory operands and can use set_cc_op. */ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1131,6 +1158,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode set_cc_op(s, CC_OP_BMILGB + ot); } +/* BLSR do not have memory operands and can use set_cc_op. */ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1151,18 +1179,119 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_ext8u_tl(s->T1, s->T1); + tcg_gen_shl_tl(s->A0, mone, s->T1); + tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); + tcg_gen_andc_tl(s->T0, s->T0, s->A0); /* * Note that since we're using BMILG (in order to get O * cleared) we need to store the inverse into C. */ - tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); + tcg_gen_setcond_tl(TCG_COND_LEU, s->T1, s->T1, bound); + prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); +} + +static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGLabel *label_top = gen_new_label(); + TCGLabel *label_bottom = gen_new_label(); + TCGv oldv = tcg_temp_new(); + TCGv newv = tcg_temp_new(); + TCGv cmpv = tcg_temp_new(); + TCGCond cond; + + TCGv cmp_lhs, cmp_rhs; + MemOp ot, ot_full; + + int jcc_op = (decode->b >> 1) & 7; + static const TCGCond cond_table[8] = { + [JCC_O] = TCG_COND_LT, /* test sign bit by comparing against 0 */ + [JCC_B] = TCG_COND_LTU, + [JCC_Z] = TCG_COND_EQ, + [JCC_BE] = TCG_COND_LEU, + [JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */ + [JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */ + [JCC_L] = TCG_COND_LT, + [JCC_LE] = TCG_COND_LE, + }; - tcg_gen_shl_tl(s->A0, mone, s->T1); - tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); - tcg_gen_andc_tl(s->T0, s->T0, s->A0); + cond = cond_table[jcc_op]; + if (decode->b & 1) { + cond = tcg_invert_cond(cond); + } - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_BMILGB + ot); + ot = decode->op[0].ot; + ot_full = ot | MO_LE; + if (jcc_op >= JCC_S) { + /* + * Sign-extend values before subtracting for S, P (zero/sign extension + * does not matter there) L, LE and their inverses. + */ + ot_full |= MO_SIGN; + } + + /* + * cmpv will be moved to cc_src *after* cpu_regs[] is written back, so use + * tcg_gen_ext_tl instead of gen_ext_tl. + */ + tcg_gen_ext_tl(cmpv, cpu_regs[decode->op[1].n], ot_full); + + /* + * Cmpxchg loop starts here. + * - s->T1: addition operand (from decoder) + * - s->A0: dest address (from decoder) + * - s->cc_srcT: memory operand (lhs for comparison) + * - cmpv: rhs for comparison + */ + gen_set_label(label_top); + gen_op_ld_v(s, ot_full, s->cc_srcT, s->A0); + tcg_gen_sub_tl(s->T0, s->cc_srcT, cmpv); + + /* Compute the comparison result by hand, to avoid clobbering cc_*. */ + switch (jcc_op) { + case JCC_O: + /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */ + tcg_gen_xor_tl(newv, s->cc_srcT, s->T0); + tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv); + tcg_gen_and_tl(s->tmp0, s->tmp0, newv); + tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + case JCC_P: + tcg_gen_ext8u_tl(s->tmp0, s->T0); + tcg_gen_ctpop_tl(s->tmp0, s->tmp0); + tcg_gen_andi_tl(s->tmp0, s->tmp0, 1); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + case JCC_S: + tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + default: + cmp_lhs = s->cc_srcT, cmp_rhs = cmpv; + break; + } + + /* Compute new value: if condition does not hold, just store back s->cc_srcT */ + tcg_gen_add_tl(newv, s->cc_srcT, s->T1); + tcg_gen_movcond_tl(cond, newv, cmp_lhs, cmp_rhs, newv, s->cc_srcT); + tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, s->cc_srcT, newv, s->mem_index, ot_full); + + /* Exit unconditionally if cmpxchg succeeded. */ + tcg_gen_brcond_tl(TCG_COND_EQ, oldv, s->cc_srcT, label_bottom); + + /* Try again if there was actually a store to make. */ + tcg_gen_brcond_tl(cond, cmp_lhs, cmp_rhs, label_top); + gen_set_label(label_bottom); + + /* Store old value to registers only after a successful store. */ + gen_writeback(s, decode, 1, s->cc_srcT); + + decode->cc_dst = s->T0; + decode->cc_src = cmpv; + decode->cc_op = CC_OP_SUBB + ot; } static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1242,9 +1371,7 @@ static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); - gen_extu(s->aflag, s->A0); - gen_add_A0_ds_seg(s); + gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override); if (s->prefix & PREFIX_DATA) { gen_helper_maskmov_xmm(tcg_env, OP_PTR1, OP_PTR2, s->A0); @@ -1355,7 +1482,8 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) /* low part of result in VEX.vvvv, high in MODRM */ switch (ot) { - default: + case MO_32: +#ifdef TARGET_X86_64 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, @@ -1363,13 +1491,15 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); break; -#ifdef TARGET_X86_64 + case MO_64: - tcg_gen_mulu2_i64(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); - break; #endif - } + tcg_gen_mulu2_tl(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); + break; + default: + g_assert_not_reached(); + } } static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -1432,19 +1562,11 @@ static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - MemOp ot = decode->op[1].ot; - if (ot < MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } gen_helper_pdep(s->T0, s->T0, s->T1); } static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - MemOp ot = decode->op[1].ot; - if (ot < MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } gen_helper_pext(s->T0, s->T0, s->T1); } @@ -1772,14 +1894,24 @@ static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - int b = decode->immediate; + int mask = ot == MO_64 ? 63 : 31; + int b = decode->immediate & mask; - if (ot == MO_64) { - tcg_gen_rotri_tl(s->T0, s->T0, b & 63); - } else { + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31); + tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b); tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + break; + + case MO_64: +#endif + tcg_gen_rotri_tl(s->T0, s->T0, b); + break; + + default: + g_assert_not_reached(); } } @@ -1790,9 +1922,6 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) mask = ot == MO_64 ? 63 : 31; tcg_gen_andi_tl(s->T1, s->T1, mask); - if (ot != MO_64) { - tcg_gen_ext32s_tl(s->T0, s->T0); - } tcg_gen_sar_tl(s->T0, s->T0, s->T1); } @@ -1867,9 +1996,6 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) mask = ot == MO_64 ? 63 : 31; tcg_gen_andi_tl(s->T1, s->T1, mask); - if (ot != MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } tcg_gen_shr_tl(s->T0, s->T0, s->T1); } diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c index 7c3c8dc7fe..65e37ae2a0 100644 --- a/target/i386/tcg/excp_helper.c +++ b/target/i386/tcg/excp_helper.c @@ -28,7 +28,7 @@ G_NORETURN void helper_raise_interrupt(CPUX86State *env, int intno, int next_eip_addend) { - raise_interrupt(env, intno, 1, 0, next_eip_addend); + raise_interrupt(env, intno, next_eip_addend); } G_NORETURN void helper_raise_exception(CPUX86State *env, int exception_index) @@ -112,10 +112,9 @@ void raise_interrupt2(CPUX86State *env, int intno, /* shortcuts to generate exceptions */ -G_NORETURN void raise_interrupt(CPUX86State *env, int intno, int is_int, - int error_code, int next_eip_addend) +G_NORETURN void raise_interrupt(CPUX86State *env, int intno, int next_eip_addend) { - raise_interrupt2(env, intno, is_int, error_code, next_eip_addend, 0); + raise_interrupt2(env, intno, 1, 0, next_eip_addend, 0); } G_NORETURN void raise_exception_err(CPUX86State *env, int exception_index, diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 4430d3d380..4b965a5d6c 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -484,9 +484,8 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); - eflags = cpu_cc_compute_all(env, CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; + eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); + CC_SRC = eflags | fcomi_ccval[ret + 1]; merge_exception_flags(env, old_flags); } @@ -497,9 +496,8 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); - eflags = cpu_cc_compute_all(env, CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; + eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); + CC_SRC = eflags | fcomi_ccval[ret + 1]; merge_exception_flags(env, old_flags); } diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index cd1723389a..ce34b737bb 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -65,8 +65,7 @@ G_NORETURN void raise_exception_err(CPUX86State *env, int exception_index, int error_code); G_NORETURN void raise_exception_err_ra(CPUX86State *env, int exception_index, int error_code, uintptr_t retaddr); -G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int is_int, - int error_code, int next_eip_addend); +G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int next_eip_addend); G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr, MMUAccessType access_type, uintptr_t retaddr); diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index 05418f181f..ab85dc5540 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -190,7 +190,7 @@ void helper_aaa(CPUX86State *env) int al, ah, af; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; ah = (env->regs[R_EAX] >> 8) & 0xff; @@ -214,7 +214,7 @@ void helper_aas(CPUX86State *env) int al, ah, af; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; ah = (env->regs[R_EAX] >> 8) & 0xff; @@ -237,7 +237,7 @@ void helper_daa(CPUX86State *env) int old_al, al, af, cf; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); cf = eflags & CC_C; af = eflags & CC_A; old_al = al = env->regs[R_EAX] & 0xff; @@ -264,7 +264,7 @@ void helper_das(CPUX86State *env) int al, al1, af, cf; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); cf = eflags & CC_C; af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c index babff06186..b0f0f7b893 100644 --- a/target/i386/tcg/misc_helper.c +++ b/target/i386/tcg/misc_helper.c @@ -41,9 +41,9 @@ void helper_into(CPUX86State *env, int next_eip_addend) { int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if (eflags & CC_O) { - raise_interrupt(env, EXCP04_INTO, 1, 0, next_eip_addend); + raise_interrupt(env, EXCP04_INTO, next_eip_addend); } } diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index eb29a1fd4e..34ccabd8ce 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -2230,7 +2230,7 @@ target_ulong helper_lsl(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2277,7 +2277,7 @@ target_ulong helper_lar(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2326,7 +2326,7 @@ void helper_verr(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2364,7 +2364,7 @@ void helper_verw(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 037bc47e7c..e1eb82a5c6 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -122,6 +122,7 @@ typedef struct DisasContext { int cpuid_ext3_features; int cpuid_7_0_ebx_features; int cpuid_7_0_ecx_features; + int cpuid_7_1_eax_features; int cpuid_xsave_features; /* TCG local temps */ @@ -522,9 +523,9 @@ void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val) gen_op_mov_reg_v(s, size, reg, s->tmp0); } -static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg) +static inline void gen_op_add_reg(DisasContext *s, MemOp size, int reg, TCGv val) { - tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0); + tcg_gen_add_tl(s->tmp0, cpu_regs[reg], val); gen_op_mov_reg_v(s, size, reg, s->tmp0); } @@ -635,17 +636,17 @@ static TCGv eip_cur_tl(DisasContext *s) } } -/* Compute SEG:REG into A0. SEG is selected from the override segment +/* Compute SEG:REG into DEST. SEG is selected from the override segment (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to indicate no override. */ -static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, - int def_seg, int ovr_seg) +static void gen_lea_v_seg_dest(DisasContext *s, MemOp aflag, TCGv dest, TCGv a0, + int def_seg, int ovr_seg) { switch (aflag) { #ifdef TARGET_X86_64 case MO_64: if (ovr_seg < 0) { - tcg_gen_mov_tl(s->A0, a0); + tcg_gen_mov_tl(dest, a0); return; } break; @@ -656,14 +657,14 @@ static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, ovr_seg = def_seg; } if (ovr_seg < 0) { - tcg_gen_ext32u_tl(s->A0, a0); + tcg_gen_ext32u_tl(dest, a0); return; } break; case MO_16: /* 16 bit address */ - tcg_gen_ext16u_tl(s->A0, a0); - a0 = s->A0; + tcg_gen_ext16u_tl(dest, a0); + a0 = dest; if (ovr_seg < 0) { if (ADDSEG(s)) { ovr_seg = def_seg; @@ -680,17 +681,23 @@ static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, TCGv seg = cpu_seg_base[ovr_seg]; if (aflag == MO_64) { - tcg_gen_add_tl(s->A0, a0, seg); + tcg_gen_add_tl(dest, a0, seg); } else if (CODE64(s)) { - tcg_gen_ext32u_tl(s->A0, a0); - tcg_gen_add_tl(s->A0, s->A0, seg); + tcg_gen_ext32u_tl(dest, a0); + tcg_gen_add_tl(dest, dest, seg); } else { - tcg_gen_add_tl(s->A0, a0, seg); - tcg_gen_ext32u_tl(s->A0, s->A0); + tcg_gen_add_tl(dest, a0, seg); + tcg_gen_ext32u_tl(dest, dest); } } } +static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, + int def_seg, int ovr_seg) +{ + gen_lea_v_seg_dest(s, aflag, s->A0, a0, def_seg, ovr_seg); +} + static inline void gen_string_movl_A0_ESI(DisasContext *s) { gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override); @@ -701,10 +708,12 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s) gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1); } -static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot) +static inline TCGv gen_compute_Dshift(DisasContext *s, MemOp ot) { - tcg_gen_ld32s_tl(s->T0, tcg_env, offsetof(CPUX86State, df)); - tcg_gen_shli_tl(s->T0, s->T0, ot); + TCGv dshift = tcg_temp_new(); + tcg_gen_ld32s_tl(dshift, tcg_env, offsetof(CPUX86State, df)); + tcg_gen_shli_tl(dshift, dshift, ot); + return dshift; }; static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) @@ -712,6 +721,9 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) if (size == MO_TL) { return src; } + if (!dst) { + dst = tcg_temp_new(); + } tcg_gen_ext_tl(dst, src, size | (sign ? MO_SIGN : 0)); return dst; } @@ -728,9 +740,9 @@ static void gen_exts(MemOp ot, TCGv reg) static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1) { - tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); - gen_extu(s->aflag, s->tmp0); - tcg_gen_brcondi_tl(cond, s->tmp0, 0, label1); + TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false); + + tcg_gen_brcondi_tl(cond, tmp, 0, label1); } static inline void gen_op_jz_ecx(DisasContext *s, TCGLabel *label1) @@ -812,13 +824,16 @@ static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port, static void gen_movs(DisasContext *s, MemOp ot) { + TCGv dshift; + gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); gen_string_movl_A0_EDI(s); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + + dshift = gen_compute_Dshift(s, ot); + gen_op_add_reg(s, s->aflag, R_ESI, dshift); + gen_op_add_reg(s, s->aflag, R_EDI, dshift); } static void gen_op_update1_cc(DisasContext *s) @@ -851,22 +866,22 @@ static void gen_op_update_neg_cc(DisasContext *s) tcg_gen_movi_tl(s->cc_srcT, 0); } -/* compute all eflags to cc_src */ -static void gen_compute_eflags(DisasContext *s) +/* compute all eflags to reg */ +static void gen_mov_eflags(DisasContext *s, TCGv reg) { - TCGv zero, dst, src1, src2; + TCGv dst, src1, src2; + TCGv_i32 cc_op; int live, dead; if (s->cc_op == CC_OP_EFLAGS) { + tcg_gen_mov_tl(reg, cpu_cc_src); return; } if (s->cc_op == CC_OP_CLR) { - tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P); - set_cc_op(s, CC_OP_EFLAGS); + tcg_gen_movi_tl(reg, CC_Z | CC_P); return; } - zero = NULL; dst = cpu_cc_dst; src1 = cpu_cc_src; src2 = cpu_cc_src2; @@ -875,7 +890,7 @@ static void gen_compute_eflags(DisasContext *s) live = cc_op_live[s->cc_op] & ~USES_CC_SRCT; dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2); if (dead) { - zero = tcg_constant_tl(0); + TCGv zero = tcg_constant_tl(0); if (dead & USES_CC_DST) { dst = zero; } @@ -887,8 +902,18 @@ static void gen_compute_eflags(DisasContext *s) } } - gen_update_cc_op(s); - gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op); + if (s->cc_op != CC_OP_DYNAMIC) { + cc_op = tcg_constant_i32(s->cc_op); + } else { + cc_op = cpu_cc_op; + } + gen_helper_cc_compute_all(reg, dst, src1, src2, cc_op); +} + +/* compute all eflags to cc_src */ +static void gen_compute_eflags(DisasContext *s) +{ + gen_mov_eflags(s, cpu_cc_src); set_cc_op(s, CC_OP_EFLAGS); } @@ -1020,6 +1045,9 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) case CC_OP_CLR: case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + case CC_OP_MULB ... CC_OP_MULQ: + return (CCPrepare) { .cond = TCG_COND_NE, + .reg = cpu_cc_src, .mask = -1 }; default: gen_compute_eflags(s); return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, @@ -1126,10 +1154,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S }; + .mask = CC_O }; break; default: case JCC_LE: @@ -1137,10 +1164,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S | CC_Z }; + .mask = CC_O | CC_Z }; break; } break; @@ -1239,11 +1265,9 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s) static void gen_stos(DisasContext *s, MemOp ot) { - gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); gen_string_movl_A0_EDI(s); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); } static void gen_lods(DisasContext *s, MemOp ot) @@ -1251,28 +1275,33 @@ static void gen_lods(DisasContext *s, MemOp ot) gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); gen_op_mov_reg_v(s, ot, R_EAX, s->T0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); } static void gen_scas(DisasContext *s, MemOp ot) { gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); - gen_op(s, OP_CMPL, ot, R_EAX); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + tcg_gen_mov_tl(cpu_cc_src, s->T1); + tcg_gen_mov_tl(s->cc_srcT, s->T0); + tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1); + set_cc_op(s, CC_OP_SUBB + ot); + + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); } static void gen_cmps(DisasContext *s, MemOp ot) { + TCGv dshift; + gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); gen_string_movl_A0_ESI(s); gen_op(s, OP_CMPL, ot, OR_TMP0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + + dshift = gen_compute_Dshift(s, ot); + gen_op_add_reg(s, s->aflag, R_ESI, dshift); + gen_op_add_reg(s, s->aflag, R_EDI, dshift); } static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) @@ -1300,8 +1329,7 @@ static void gen_ins(DisasContext *s, MemOp ot) tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); gen_helper_in_func(ot, s->T0, s->tmp2_i32); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); gen_bpt_io(s, s->tmp2_i32, ot); } @@ -1314,8 +1342,7 @@ static void gen_outs(DisasContext *s, MemOp ot) tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0); gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); gen_bpt_io(s, s->tmp2_i32, ot); } @@ -2474,14 +2501,10 @@ static void gen_jcc(DisasContext *s, int b, int diff) gen_jmp_rel(s, s->dflag, diff, 0); } -static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b, - int modrm, int reg) +static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src) { - CCPrepare cc; + CCPrepare cc = gen_prepare_cc(s, b, s->T1); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - - cc = gen_prepare_cc(s, b, s->T1); if (cc.mask != -1) { TCGv t0 = tcg_temp_new(); tcg_gen_andi_tl(t0, cc.reg, cc.mask); @@ -2491,9 +2514,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b, cc.reg2 = tcg_constant_tl(cc.imm); } - tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2, - s->T0, cpu_regs[reg]); - gen_op_mov_reg_v(s, ot, reg, s->T0); + tcg_gen_movcond_tl(cc.cond, dest, cc.reg, cc.reg2, src, dest); } static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg) @@ -2560,7 +2581,7 @@ static void gen_push_v(DisasContext *s, TCGv val) if (!CODE64(s)) { if (ADDSEG(s)) { - new_esp = s->tmp4; + new_esp = tcg_temp_new(); tcg_gen_mov_tl(new_esp, s->A0); } gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); @@ -2575,8 +2596,8 @@ static MemOp gen_pop_T0(DisasContext *s) { MemOp d_ot = mo_pushpop(s, s->dflag); - gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1); - gen_op_ld_v(s, d_ot, s->T0, s->A0); + gen_lea_v_seg_dest(s, mo_stacksize(s), s->T0, cpu_regs[R_ESP], R_SS, -1); + gen_op_ld_v(s, d_ot, s->T0, s->T0); return d_ot; } @@ -4182,7 +4203,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]); tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]); tcg_gen_add_tl(s->A0, s->A0, s->T0); - gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); gen_op_ld_v(s, MO_8, s->T0, s->A0); gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0); @@ -4930,6 +4950,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) case 0xaa: /* stosS */ case 0xab: ot = mo_b_d(b, dflag); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { gen_repz_stos(s, ot); } else { @@ -4948,6 +4969,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) case 0xae: /* scasS */ case 0xaf: ot = mo_b_d(b, dflag); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); if (prefixes & PREFIX_REPNZ) { gen_repz_scas(s, ot, 1); } else if (prefixes & PREFIX_REPZ) { @@ -5201,7 +5223,9 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) ot = dflag; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_cmovcc1(env, s, ot, b, modrm, reg); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_cmovcc1(s, b ^ 1, s->T0, cpu_regs[reg]); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; /************************/ @@ -5834,7 +5858,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_update_cc_op(s); gen_update_eip_cur(s); tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); - gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); gen_helper_monitor(tcg_env, s->A0); break; @@ -6941,6 +6964,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX]; dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX]; dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX]; + dc->cpuid_7_1_eax_features = env->features[FEAT_7_1_EAX]; dc->cpuid_xsave_features = env->features[FEAT_XSAVE]; dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) || (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); diff --git a/target/mips/meson.build b/target/mips/meson.build index e57ef24ecf..a26d1e1f79 100644 --- a/target/mips/meson.build +++ b/target/mips/meson.build @@ -12,7 +12,7 @@ if have_system subdir('sysemu') endif -if 'CONFIG_TCG' in config_all +if 'CONFIG_TCG' in config_all_accel subdir('tcg') endif |