aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2015-07-02 14:53:40 +0100
committerRichard Henderson <rth@twiddle.net>2016-02-13 07:59:59 +1100
commit19dc85dba23c0db1ca932c62e453c37e00761628 (patch)
tree9ac16b50d5d1b11c9533c6a85e336c1274d1ba38
parent121f3157887f92268a3d6169e2d4601f9292020b (diff)
target-i386: Add XSAVE extension
This includes XSAVE, XRSTOR, XGETBV, XSETBV, which are all related, as well as the associate cpuid bits. Signed-off-by: Richard Henderson <rth@twiddle.net>
-rw-r--r--target-i386/cpu.c39
-rw-r--r--target-i386/fpu_helper.c145
-rw-r--r--target-i386/helper.c12
-rw-r--r--target-i386/helper.h4
-rw-r--r--target-i386/kvm.c11
-rw-r--r--target-i386/translate.c54
6 files changed, 241 insertions, 24 deletions
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 3fa14bf171..fb8a646a20 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -331,14 +331,14 @@ static const char *cpuid_6_feature_name[] = {
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
+ CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \
CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
/* missing:
CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
- CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
- CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
- CPUID_EXT_RDRAND */
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX,
+ CPUID_EXT_F16C, CPUID_EXT_RDRAND */
#ifdef TARGET_X86_64
#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
@@ -2323,10 +2323,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx = (cpu->apic_id << 24) |
8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
*ecx = env->features[FEAT_1_ECX];
+ if ((*ecx & CPUID_EXT_XSAVE) && (env->cr[4] & CR4_OSXSAVE_MASK)) {
+ *ecx |= CPUID_EXT_OSXSAVE;
+ }
*edx = env->features[FEAT_1_EDX];
if (cs->nr_cores * cs->nr_threads > 1) {
*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
- *edx |= 1 << 28; /* HTT bit */
+ *edx |= CPUID_HT;
}
break;
case 2:
@@ -2450,7 +2453,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0xD: {
KVMState *s = cs->kvm_state;
- uint64_t kvm_mask;
+ uint64_t ena_mask;
int i;
/* Processor Extended State */
@@ -2458,35 +2461,39 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx = 0;
*ecx = 0;
*edx = 0;
- if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) || !kvm_enabled()) {
+ if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
break;
}
- kvm_mask =
- kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX) |
- ((uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32);
+ if (kvm_enabled()) {
+ ena_mask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX);
+ ena_mask <<= 32;
+ ena_mask |= kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+ } else {
+ ena_mask = -1;
+ }
if (count == 0) {
*ecx = 0x240;
for (i = 2; i < ARRAY_SIZE(ext_save_areas); i++) {
const ExtSaveArea *esa = &ext_save_areas[i];
- if ((env->features[esa->feature] & esa->bits) == esa->bits &&
- (kvm_mask & (1 << i)) != 0) {
+ if ((env->features[esa->feature] & esa->bits) == esa->bits
+ && ((ena_mask >> i) & 1) != 0) {
if (i < 32) {
- *eax |= 1 << i;
+ *eax |= 1u << i;
} else {
- *edx |= 1 << (i - 32);
+ *edx |= 1u << (i - 32);
}
*ecx = MAX(*ecx, esa->offset + esa->size);
}
}
- *eax |= kvm_mask & (XSTATE_FP | XSTATE_SSE);
+ *eax |= ena_mask & (XSTATE_FP | XSTATE_SSE);
*ebx = *ecx;
} else if (count == 1) {
*eax = env->features[FEAT_XSAVE];
} else if (count < ARRAY_SIZE(ext_save_areas)) {
const ExtSaveArea *esa = &ext_save_areas[count];
- if ((env->features[esa->feature] & esa->bits) == esa->bits &&
- (kvm_mask & (1 << count)) != 0) {
+ if ((env->features[esa->feature] & esa->bits) == esa->bits
+ && ((ena_mask >> count) & 1) != 0) {
*eax = esa->size;
*ebx = esa->offset;
}
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index a7da370c5d..159e1df185 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -1190,6 +1190,45 @@ void helper_fxsave(CPUX86State *env, target_ulong ptr)
}
}
+static uint64_t get_xinuse(CPUX86State *env)
+{
+ /* We don't track XINUSE. We could calculate it here, but it's
+ probably less work to simply indicate all components in use. */
+ return -1;
+}
+
+void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+{
+ uintptr_t ra = GETPC();
+ uint64_t old_bv, new_bv;
+
+ /* The OS must have enabled XSAVE. */
+ if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+ raise_exception_ra(env, EXCP06_ILLOP, ra);
+ }
+
+ /* The operand must be 64 byte aligned. */
+ if (ptr & 63) {
+ raise_exception_ra(env, EXCP0D_GPF, ra);
+ }
+
+ /* Never save anything not enabled by XCR0. */
+ rfbm &= env->xcr0;
+
+ if (rfbm & XSTATE_FP) {
+ do_xsave_fpu(env, ptr, ra);
+ }
+ if (rfbm & XSTATE_SSE) {
+ do_xsave_mxcsr(env, ptr, ra);
+ do_xsave_sse(env, ptr, ra);
+ }
+
+ /* Update the XSTATE_BV field. */
+ old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
+ new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm);
+ cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
+}
+
static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
int i, fpus, fptag;
@@ -1259,6 +1298,112 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
}
}
+void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+{
+ uintptr_t ra = GETPC();
+ uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
+
+ rfbm &= env->xcr0;
+
+ /* The OS must have enabled XSAVE. */
+ if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+ raise_exception_ra(env, EXCP06_ILLOP, ra);
+ }
+
+ /* The operand must be 64 byte aligned. */
+ if (ptr & 63) {
+ raise_exception_ra(env, EXCP0D_GPF, ra);
+ }
+
+ xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
+
+ if ((int64_t)xstate_bv < 0) {
+ /* FIXME: Compact form. */
+ raise_exception_ra(env, EXCP0D_GPF, ra);
+ }
+
+ /* Standard form. */
+
+ /* The XSTATE field must not set bits not present in XCR0. */
+ if (xstate_bv & ~env->xcr0) {
+ raise_exception_ra(env, EXCP0D_GPF, ra);
+ }
+
+ /* The XCOMP field must be zero. */
+ xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
+ xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
+ if (xcomp_bv0 || xcomp_bv1) {
+ raise_exception_ra(env, EXCP0D_GPF, ra);
+ }
+
+ if (rfbm & XSTATE_FP) {
+ if (xstate_bv & XSTATE_FP) {
+ do_xrstor_fpu(env, ptr, ra);
+ } else {
+ helper_fninit(env);
+ memset(env->fpregs, 0, sizeof(env->fpregs));
+ }
+ }
+ if (rfbm & XSTATE_SSE) {
+ /* Note that the standard form of XRSTOR loads MXCSR from memory
+ whether or not the XSTATE_BV bit is set. */
+ do_xrstor_mxcsr(env, ptr, ra);
+ if (xstate_bv & XSTATE_SSE) {
+ do_xrstor_sse(env, ptr, ra);
+ } else {
+ /* ??? When AVX is implemented, we may have to be more
+ selective in the clearing. */
+ memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
+ }
+ }
+}
+
+uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
+{
+ /* The OS must have enabled XSAVE. */
+ if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+ raise_exception_ra(env, EXCP06_ILLOP, GETPC());
+ }
+
+ switch (ecx) {
+ case 0:
+ return env->xcr0;
+ case 1:
+ /* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2]. */
+ return env->xcr0 & get_xinuse(env);
+ }
+ raise_exception_ra(env, EXCP0D_GPF, GETPC());
+}
+
+void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
+{
+ uint32_t dummy, ena_lo, ena_hi;
+ uint64_t ena;
+
+ /* The OS must have enabled XSAVE. */
+ if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+ raise_exception_ra(env, EXCP06_ILLOP, GETPC());
+ }
+
+ /* Only XCR0 is defined at present; the FPU may not be disabled. */
+ if (ecx != 0 || (mask & XSTATE_FP) == 0) {
+ goto do_gpf;
+ }
+
+ /* Disallow enabling unimplemented features. */
+ cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
+ ena = ((uint64_t)ena_hi << 32) | ena_lo;
+ if (mask & ~ena) {
+ goto do_gpf;
+ }
+
+ env->xcr0 = mask;
+ return;
+
+ do_gpf:
+ raise_exception_ra(env, EXCP0D_GPF, GETPC());
+}
+
void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
{
CPU_LDoubleU temp;
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 3802ed9359..618bcff695 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -647,6 +647,7 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
{
X86CPU *cpu = x86_env_get_cpu(env);
+ uint32_t hflags;
#if defined(DEBUG_MMU)
printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
@@ -656,24 +657,27 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
CR4_SMEP_MASK | CR4_SMAP_MASK)) {
tlb_flush(CPU(cpu), 1);
}
+
+ /* Clear bits we're going to recompute. */
+ hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK);
+
/* SSE handling */
if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) {
new_cr4 &= ~CR4_OSFXSR_MASK;
}
- env->hflags &= ~HF_OSFXSR_MASK;
if (new_cr4 & CR4_OSFXSR_MASK) {
- env->hflags |= HF_OSFXSR_MASK;
+ hflags |= HF_OSFXSR_MASK;
}
if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) {
new_cr4 &= ~CR4_SMAP_MASK;
}
- env->hflags &= ~HF_SMAP_MASK;
if (new_cr4 & CR4_SMAP_MASK) {
- env->hflags |= HF_SMAP_MASK;
+ hflags |= HF_SMAP_MASK;
}
env->cr[4] = new_cr4;
+ env->hflags = hflags;
}
#if defined(CONFIG_USER_ONLY)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 6109e466fb..9dfc735429 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -187,6 +187,10 @@ DEF_HELPER_3(fsave, void, env, tl, int)
DEF_HELPER_3(frstor, void, env, tl, int)
DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64)
+DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64)
+DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32)
+DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64)
DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 94024bc1b1..fca0314c3a 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1855,13 +1855,16 @@ static int kvm_get_sregs(X86CPU *cpu)
HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
- hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+ hflags = env->hflags & HFLAG_COPY_MASK;
+ hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
- hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
- (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ hflags |= HF_OSFXSR_MASK;
+ }
if (env->efer & MSR_EFER_LMA) {
hflags |= HF_LMA_MASK;
@@ -1882,7 +1885,7 @@ static int kvm_get_sregs(X86CPU *cpu)
env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
}
}
- env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+ env->hflags = hflags;
return 0;
}
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 647757843f..7571e850d5 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7079,6 +7079,36 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
break;
+ case 0xd0: /* xgetbv */
+ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+ gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+ tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+ break;
+
+ case 0xd1: /* xsetbv */
+ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ }
+ tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+ cpu_regs[R_EDX]);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+ gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+ /* End TB because translation flags may change. */
+ gen_jmp_im(s->pc - pc_start);
+ gen_eob(s);
+ break;
+
case 0xd8: /* VMRUN */
if (!(s->flags & HF_SVME_MASK) || !s->pe) {
goto illegal_op;
@@ -7580,6 +7610,30 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
break;
+ CASE_MEM_OP(4): /* xsave */
+ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (prefixes & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ gen_lea_modrm(env, s, modrm);
+ tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+ cpu_regs[R_EDX]);
+ gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
+ break;
+
+ CASE_MEM_OP(5): /* xrstor */
+ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+ || (prefixes & (PREFIX_LOCK | PREFIX_DATA
+ | PREFIX_REPZ | PREFIX_REPNZ))) {
+ goto illegal_op;
+ }
+ gen_lea_modrm(env, s, modrm);
+ tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+ cpu_regs[R_EDX]);
+ gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
+ break;
+
CASE_MEM_OP(6): /* clwb */
if (prefixes & PREFIX_LOCK) {
goto illegal_op;