diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2024-06-10 10:39:00 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2024-10-17 19:41:29 +0200 |
commit | fcd16539ebfe2c9e4cd568e6ce5da566a027a524 (patch) | |
tree | 15b2a3d9d0cbfb1fbb015fef2dfe5e4c3af218d8 /target/i386 | |
parent | a2e2c78d2af0cfcc3d59542e70503c56d9ae7369 (diff) |
target/i386: convert CMPXCHG8B/CMPXCHG16B to new decoder
The gen_cmpxchg8b and gen_cmpxchg16b functions even have the correct
prototype already; the only thing that needs to be done is removing the
gen_lea_modrm() call.
This moves the last LOCK-enabled instructions to the new decoder. It is
now possible to assume that gen_multi0F is called only after checking
that PREFIX_LOCK was not specified.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'target/i386')
-rw-r--r-- | target/i386/tcg/decode-new.c.inc | 34 | ||||
-rw-r--r-- | target/i386/tcg/decode-new.h | 2 | ||||
-rw-r--r-- | target/i386/tcg/emit.c.inc | 96 | ||||
-rw-r--r-- | target/i386/tcg/translate.c | 121 |
4 files changed, 124 insertions, 129 deletions
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index ee3ba16116..fe3bfed147 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -288,6 +288,25 @@ static void decode_group8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, } } +static void decode_group9(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry group9_reg = + X86_OP_ENTRY0(multi0F); /* unconverted */ + static const X86OpEntry cmpxchg8b = + X86_OP_ENTRY1(CMPXCHG8B, M,q, lock p_00 cpuid(CX8)); + static const X86OpEntry cmpxchg16b = + X86_OP_ENTRY1(CMPXCHG16B, M,dq, lock p_00 cpuid(CX16)); + + int modrm = get_modrm(s, env); + int op = (modrm >> 3) & 7; + + if ((modrm >> 6) == 3) { + *entry = group9_reg; + } else if (op == 1) { + *entry = REX_W(s) ? cmpxchg16b : cmpxchg8b; + } +} + static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) { static const X86OpEntry group15_reg[8] = { @@ -1203,7 +1222,7 @@ static const X86OpEntry opcodes_0F[256] = { [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66), [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66), [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66), - [0xc7] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */ + [0xc7] = X86_OP_GROUP0(group9), [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), @@ -2245,8 +2264,12 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return (s->cpuid_features & CPUID_CMOV); case X86_FEAT_CLFLUSH: return (s->cpuid_features & CPUID_CLFLUSH); + case X86_FEAT_CX8: + return (s->cpuid_features & CPUID_CX8); case X86_FEAT_FXSR: return (s->cpuid_features & CPUID_FXSR); + case X86_FEAT_CX16: + return (s->cpuid_ext_features & CPUID_EXT_CX16); case X86_FEAT_F16C: return (s->cpuid_ext_features & CPUID_EXT_F16C); case X86_FEAT_FMA: @@ -2726,15 +2749,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu) break; } - /* - * hack for old decoder: 0F C7 has both instructions that accept LOCK - * and instructions that don't, but also needs X86_SPECIAL_NoLoadEA. - * Keep this here until CMPXCHG8B/CMPXCHG16B is separated from the - * other unconverted opcodes. - */ - if (decode.e.gen == gen_multi0F) { - accept_lock = true; - } if ((s->prefix & PREFIX_LOCK) && !accept_lock) { goto illegal_op; } diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index bebc77bd54..7f23d373ea 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -114,6 +114,8 @@ typedef enum X86CPUIDFeature { X86_FEAT_CLWB, X86_FEAT_CMOV, X86_FEAT_CMPCCXADD, + X86_FEAT_CX8, + X86_FEAT_CX16, X86_FEAT_F16C, X86_FEAT_FMA, X86_FEAT_FSGSBASE, diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 29de8bba6f..fd17a9b1ec 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1788,6 +1788,102 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) decode->cc_op = CC_OP_SUBB + ot; } +static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode) +{ +#ifdef TARGET_X86_64 + MemOp mop = MO_TE | MO_128 | MO_ALIGN; + TCGv_i64 t0, t1; + TCGv_i128 cmp, val; + + cmp = tcg_temp_new_i128(); + val = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); + } else { + tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); + } + + tcg_gen_extr_i128_i64(s->T0, s->T1, val); + + /* Determine success after the fact. */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]); + tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]); + tcg_gen_or_i64(t0, t0, t1); + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1); + + /* + * Extract the result values for the register pair. We may do this + * unconditionally, because on success (Z=1), the old value matches + * the previous value in RDX:RAX. + */ + tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0); + tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1); +#else + abort(); +#endif +} + +static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 cmp, val, old; + TCGv Z; + + cmp = tcg_temp_new_i64(); + val = tcg_temp_new_i64(); + old = tcg_temp_new_i64(); + + /* Construct the comparison values from the register pair. */ + tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); + } else { + tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, + s->mem_index, MO_TEUQ); + } + + /* Set tmp0 to match the required value of Z. */ + tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); + Z = tcg_temp_new(); + tcg_gen_trunc_i64_tl(Z, cmp); + + /* + * Extract the result values for the register pair. + * For 32-bit, we may do this unconditionally, because on success (Z=1), + * the old value matches the previous value in EDX:EAX. For x86_64, + * the store must be conditional, because we must leave the source + * registers unchanged on success, and zero-extend the writeback + * on failure (Z=0). + */ + if (TARGET_LONG_BITS == 32) { + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old); + } else { + TCGv zero = tcg_constant_tl(0); + + tcg_gen_extr_i64_tl(s->T0, s->T1, old); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero, + s->T0, cpu_regs[R_EAX]); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero, + s->T1, cpu_regs[R_EDX]); + } + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1); +} + static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index be68cde1ba..1d3b5f35c3 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2289,104 +2289,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align) tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop); } -static void gen_cmpxchg8b(DisasContext *s, X86DecodedInsn *decode) -{ - TCGv_i64 cmp, val, old; - TCGv Z; - - gen_lea_modrm(s, decode); - - cmp = tcg_temp_new_i64(); - val = tcg_temp_new_i64(); - old = tcg_temp_new_i64(); - - /* Construct the comparison values from the register pair. */ - tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); - tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); - - /* Only require atomic with LOCK; non-parallel handled in generator. */ - if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); - } else { - tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, - s->mem_index, MO_TEUQ); - } - - /* Set tmp0 to match the required value of Z. */ - tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); - Z = tcg_temp_new(); - tcg_gen_trunc_i64_tl(Z, cmp); - - /* - * Extract the result values for the register pair. - * For 32-bit, we may do this unconditionally, because on success (Z=1), - * the old value matches the previous value in EDX:EAX. For x86_64, - * the store must be conditional, because we must leave the source - * registers unchanged on success, and zero-extend the writeback - * on failure (Z=0). - */ - if (TARGET_LONG_BITS == 32) { - tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old); - } else { - TCGv zero = tcg_constant_tl(0); - - tcg_gen_extr_i64_tl(s->T0, s->T1, old); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero, - s->T0, cpu_regs[R_EAX]); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero, - s->T1, cpu_regs[R_EDX]); - } - - /* Update Z. */ - gen_compute_eflags(s); - tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1); -} - -#ifdef TARGET_X86_64 -static void gen_cmpxchg16b(DisasContext *s, X86DecodedInsn *decode) -{ - MemOp mop = MO_TE | MO_128 | MO_ALIGN; - TCGv_i64 t0, t1; - TCGv_i128 cmp, val; - - gen_lea_modrm(s, decode); - - cmp = tcg_temp_new_i128(); - val = tcg_temp_new_i128(); - tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); - tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); - - /* Only require atomic with LOCK; non-parallel handled in generator. */ - if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); - } else { - tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); - } - - tcg_gen_extr_i128_i64(s->T0, s->T1, val); - - /* Determine success after the fact. */ - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]); - tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]); - tcg_gen_or_i64(t0, t0, t1); - - /* Update Z. */ - gen_compute_eflags(s); - tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0); - tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1); - - /* - * Extract the result values for the register pair. We may do this - * unconditionally, because on success (Z=1), the old value matches - * the previous value in RDX:RAX. - */ - tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0); - tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1); -} -#endif - #include "emit.c.inc" static void gen_x87(DisasContext *s, X86DecodedInsn *decode) @@ -2962,29 +2864,10 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode) /* now check op code */ switch (b) { - case 0x1c7: /* cmpxchg8b */ + case 0x1c7: /* RDSEED, RDPID with f3 prefix */ mod = (modrm >> 6) & 3; switch ((modrm >> 3) & 7) { - case 1: /* CMPXCHG8, CMPXCHG16 */ - if (mod == 3) { - goto illegal_op; - } -#ifdef TARGET_X86_64 - if (dflag == MO_64) { - if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) { - goto illegal_op; - } - gen_cmpxchg16b(s, decode); - break; - } -#endif - if (!(s->cpuid_features & CPUID_CX8)) { - goto illegal_op; - } - gen_cmpxchg8b(s, decode); - break; - - case 7: /* RDSEED, RDPID with f3 prefix */ + case 7: if (mod != 3 || (s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) { goto illegal_op; |