aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2022-03-15 14:41:16 +0000
committerPeter Maydell <peter.maydell@linaro.org>2022-03-15 14:41:16 +0000
commitdee3a86d54f7d200e715843ee92aba2aaeb8382f (patch)
treeb87d8b1c2d8ac5eda62de9d3d93b16a8502a8e65 /target
parent1af26ce67082727aeb109708f2c75d5fc5364e23 (diff)
parent8a5606f6449c90a0731790ed03eebedcf7045070 (diff)
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* whpx fixes in preparation for GDB support (Ivan) * VSS header fixes (Marc-André) * 5-level EPT support (Vitaly) * AMX support (Jing Liu & Yang Zhong) * Bundle changes to MSI routes (Longpeng) * More precise emulation of #SS (Gareth) * Disable ASAN testing # gpg: Signature made Tue 15 Mar 2022 10:51:00 GMT # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "pbonzini@redhat.com" # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full] # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full] # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (22 commits) gitlab-ci: do not run tests with address sanitizer KVM: SVM: always set MSR_AMD64_TSC_RATIO to default value i386: Add Icelake-Server-v6 CPU model with 5-level EPT support x86: Support XFD and AMX xsave data migration x86: add support for KVM_CAP_XSAVE2 and AMX state migration x86: Add AMX CPUIDs enumeration x86: Add XFD faulting bit for state components x86: Grant AMX permission for guest x86: Add AMX XTILECFG and XTILEDATA components x86: Fix the 64-byte boundary enumeration for extended state linux-headers: include missing changes from 5.17 target/i386: Throw a #SS when loading a non-canonical IST target/i386: only include bits in pg_mode if they are not ignored kvm/msi: do explicit commit when adding msi routes kvm-irqchip: introduce new API to support route change update meson-buildoptions.sh qga/vss: update informative message about MinGW qga/vss-win32: check old VSS SDK headers meson: fix generic location of vss headers vmxcap: Add 5-level EPT bit ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r--target/i386/cpu.c84
-rw-r--r--target/i386/cpu.h43
-rw-r--r--target/i386/kvm/kvm-cpu.c11
-rw-r--r--target/i386/kvm/kvm.c125
-rw-r--r--target/i386/kvm/kvm_i386.h1
-rw-r--r--target/i386/machine.c46
-rw-r--r--target/i386/tcg/seg_helper.c52
-rw-r--r--target/i386/tcg/sysemu/excp_helper.c40
-rw-r--r--target/i386/whpx/whpx-all.c30
-rw-r--r--target/i386/xsave_helper.c28
10 files changed, 390 insertions, 70 deletions
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 6c7ef1099b..a88d6554c8 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -575,6 +575,18 @@ static CPUCacheInfo legacy_l3_cache = {
#define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */
#define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
+/* CPUID Leaf 0x1D constants: */
+#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1
+#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000
+#define INTEL_AMX_BYTES_PER_TILE 0x400
+#define INTEL_AMX_BYTES_PER_ROW 0x40
+#define INTEL_AMX_TILE_MAX_NAMES 0x8
+#define INTEL_AMX_TILE_MAX_ROWS 0x10
+
+/* CPUID Leaf 0x1E constants: */
+#define INTEL_AMX_TMUL_MAX_K 0x10
+#define INTEL_AMX_TMUL_MAX_N 0x40
+
void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
uint32_t vendor2, uint32_t vendor3)
{
@@ -844,8 +856,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
"avx512-vp2intersect", NULL, "md-clear", NULL,
NULL, NULL, "serialize", NULL,
"tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
- NULL, NULL, NULL, "avx512-fp16",
- NULL, NULL, "spec-ctrl", "stibp",
+ NULL, NULL, "amx-bf16", "avx512-fp16",
+ "amx-tile", "amx-int8", "spec-ctrl", "stibp",
NULL, "arch-capabilities", "core-capability", "ssbd",
},
.cpuid = {
@@ -910,7 +922,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
"xsaveopt", "xsavec", "xgetbv1", "xsaves",
- NULL, NULL, NULL, NULL,
+ "xfd", NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
@@ -1402,6 +1414,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
[XSTATE_PKRU_BIT] =
{ .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
.size = sizeof(XSavePKRU) },
+ [XSTATE_XTILE_CFG_BIT] = {
+ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
+ .size = sizeof(XSaveXTILECFG),
+ },
+ [XSTATE_XTILE_DATA_BIT] = {
+ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
+ .size = sizeof(XSaveXTILEDATA)
+ },
};
static uint32_t xsave_area_size(uint64_t mask)
@@ -3506,6 +3526,14 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
},
},
+ {
+ .version = 6,
+ .note = "5-level EPT",
+ .props = (PropValue[]) {
+ { "vmx-page-walk-5", "on" },
+ { /* end of list */ }
+ },
+ },
{ /* end of list */ }
}
},
@@ -5488,6 +5516,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
const ExtSaveArea *esa = &x86_ext_save_areas[count];
*eax = esa->size;
*ebx = esa->offset;
+ *ecx = esa->ecx &
+ (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK);
}
}
break;
@@ -5576,6 +5606,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
break;
}
+ case 0x1D: {
+ /* AMX TILE */
+ *eax = 0;
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
+ break;
+ }
+
+ if (count == 0) {
+ /* Highest numbered palette subleaf */
+ *eax = INTEL_AMX_TILE_MAX_SUBLEAF;
+ } else if (count == 1) {
+ *eax = INTEL_AMX_TOTAL_TILE_BYTES |
+ (INTEL_AMX_BYTES_PER_TILE << 16);
+ *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
+ *ecx = INTEL_AMX_TILE_MAX_ROWS;
+ }
+ break;
+ }
+ case 0x1E: {
+ /* AMX TMUL */
+ *eax = 0;
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
+ break;
+ }
+
+ if (count == 0) {
+ /* Highest numbered palette subleaf */
+ *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
+ }
+ break;
+ }
case 0x40000000:
/*
* CPUID code in kvm_arch_init_vcpu() ignores stuff
@@ -5930,9 +5997,7 @@ static void x86_cpu_reset(DeviceState *dev)
x86_cpu_set_sgxlepubkeyhash(env);
- if (env->features[FEAT_SVM] & CPUID_SVM_TSCSCALE) {
- env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT;
- }
+ env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT;
#endif
}
@@ -5998,6 +6063,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
CPUX86State *env = &cpu->env;
int i;
uint64_t mask;
+ static bool request_perm;
if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
env->features[FEAT_XSAVE_COMP_LO] = 0;
@@ -6013,6 +6079,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
}
}
+ /* Only request permission for first vcpu */
+ if (kvm_enabled() && !request_perm) {
+ kvm_request_xsave_components(cpu, mask);
+ request_perm = true;
+ }
+
env->features[FEAT_XSAVE_COMP_LO] = mask;
env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
}
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e11734ba86..5e406088a9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -507,6 +507,9 @@ typedef enum X86Seg {
#define MSR_VM_HSAVE_PA 0xc0010117
+#define MSR_IA32_XFD 0x000001c4
+#define MSR_IA32_XFD_ERR 0x000001c5
+
#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_XSS 0x00000da0
#define MSR_IA32_UMWAIT_CONTROL 0xe1
@@ -539,6 +542,8 @@ typedef enum X86Seg {
#define XSTATE_ZMM_Hi256_BIT 6
#define XSTATE_Hi16_ZMM_BIT 7
#define XSTATE_PKRU_BIT 9
+#define XSTATE_XTILE_CFG_BIT 17
+#define XSTATE_XTILE_DATA_BIT 18
#define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT)
#define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT)
@@ -549,6 +554,17 @@ typedef enum X86Seg {
#define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT)
#define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
#define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
+#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
+
+#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK)
+
+#define ESA_FEATURE_ALIGN64_BIT 1
+#define ESA_FEATURE_XFD_BIT 2
+
+#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT)
+#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT)
+
/* CPUID feature words */
typedef enum FeatureWord {
@@ -842,6 +858,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
/* AVX512_FP16 instruction */
#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
+/* AMX tile (two-dimensional register) */
+#define CPUID_7_0_EDX_AMX_TILE (1U << 24)
/* Speculation Control */
#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
/* Single Thread Indirect Branch Predictors */
@@ -857,6 +875,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
/* AVX512 BFloat16 Instruction */
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)
+/* XFD Extend Feature Disabled */
+#define CPUID_D_1_EAX_XFD (1U << 4)
/* Packets which contain IP payload have LIP values */
#define CPUID_14_0_ECX_LIP (1U << 31)
@@ -1345,6 +1365,16 @@ typedef struct XSavePKRU {
uint32_t padding;
} XSavePKRU;
+/* Ext. save area 17: AMX XTILECFG state */
+typedef struct XSaveXTILECFG {
+ uint8_t xtilecfg[64];
+} XSaveXTILECFG;
+
+/* Ext. save area 18: AMX XTILEDATA state */
+typedef struct XSaveXTILEDATA {
+ uint8_t xtiledata[8][1024];
+} XSaveXTILEDATA;
+
QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
@@ -1352,13 +1382,16 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40);
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000);
typedef struct ExtSaveArea {
uint32_t feature, bits;
uint32_t offset, size;
+ uint32_t ecx;
} ExtSaveArea;
-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
+#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1)
extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
@@ -1499,6 +1532,10 @@ typedef struct CPUArchState {
uint64_t opmask_regs[NB_OPMASK_REGS];
YMMReg zmmh_regs[CPU_NB_REGS];
ZMMReg hi16_zmm_regs[CPU_NB_REGS];
+#ifdef TARGET_X86_64
+ uint8_t xtilecfg[64];
+ uint8_t xtiledata[8192];
+#endif
/* sysenter registers */
uint32_t sysenter_cs;
@@ -1584,6 +1621,10 @@ typedef struct CPUArchState {
uint64_t msr_rtit_cr3_match;
uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
+ /* Per-VCPU XFD MSRs */
+ uint64_t msr_xfd;
+ uint64_t msr_xfd_err;
+
/* exception/interrupt handling */
int error_code;
int exception_is_int;
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index d95028018e..a35a1bf9fe 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
static void kvm_cpu_xsave_init(void)
{
static bool first = true;
- KVMState *s = kvm_state;
+ uint32_t eax, ebx, ecx, edx;
int i;
if (!first) {
@@ -100,10 +100,11 @@ static void kvm_cpu_xsave_init(void)
ExtSaveArea *esa = &x86_ext_save_areas[i];
if (esa->size) {
- int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
- if (sz != 0) {
- assert(esa->size == sz);
- esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
+ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+ if (eax != 0) {
+ assert(esa->size == eax);
+ esa->offset = ebx;
+ esa->ecx = ecx;
}
}
}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 83d0988302..ef2c68a6f4 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -17,6 +17,7 @@
#include "qapi/error.h"
#include <sys/ioctl.h>
#include <sys/utsname.h>
+#include <sys/syscall.h>
#include <linux/kvm.h>
#include "standard-headers/asm-x86/kvm_para.h"
@@ -123,6 +124,7 @@ static uint32_t num_architectural_pmu_gp_counters;
static uint32_t num_architectural_pmu_fixed_counters;
static int has_xsave;
+static int has_xsave2;
static int has_xcrs;
static int has_pit_state2;
static int has_sregs2;
@@ -349,6 +351,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
struct kvm_cpuid2 *cpuid;
uint32_t ret = 0;
uint32_t cpuid_1_edx;
+ uint64_t bitmask;
cpuid = get_supported_cpuid(s);
@@ -406,6 +409,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
if (!has_msr_arch_capabs) {
ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
}
+ } else if (function == 0xd && index == 0 &&
+ (reg == R_EAX || reg == R_EDX)) {
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask
+ };
+
+ bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
+ if (!sys_attr) {
+ warn_report("cannot get sys attribute capabilities %d", sys_attr);
+ }
+
+ int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
+ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
+ "error: %d", rc);
+ }
+ ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
} else if (function == 0x80000001 && reg == R_ECX) {
/*
* It's safe to enable TOPOEXT even if it's not returned by
@@ -1566,6 +1588,26 @@ static Error *invtsc_mig_blocker;
#define KVM_MAX_CPUID_ENTRIES 100
+static void kvm_init_xsave(CPUX86State *env)
+{
+ if (has_xsave2) {
+ env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
+ } else if (has_xsave) {
+ env->xsave_buf_len = sizeof(struct kvm_xsave);
+ } else {
+ return;
+ }
+
+ env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+ memset(env->xsave_buf, 0, env->xsave_buf_len);
+ /*
+ * The allocated storage must be large enough for all of the
+ * possible XSAVE state components.
+ */
+ assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
+ env->xsave_buf_len);
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
@@ -1595,6 +1637,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpuid_i = 0;
+ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
+
r = kvm_arch_set_tsc_khz(cs);
if (r < 0) {
return r;
@@ -1760,7 +1804,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
c = &cpuid_data.entries[cpuid_i++];
}
break;
- case 0x14: {
+ case 0x14:
+ case 0x1d:
+ case 0x1e: {
uint32_t times;
c->function = i;
@@ -1982,19 +2028,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
if (r) {
goto fail;
}
-
- if (has_xsave) {
- env->xsave_buf_len = sizeof(struct kvm_xsave);
- env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
- memset(env->xsave_buf, 0, env->xsave_buf_len);
-
- /*
- * The allocated storage must be large enough for all of the
- * possible XSAVE state components.
- */
- assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
- <= env->xsave_buf_len);
- }
+ kvm_init_xsave(env);
max_nested_state_len = kvm_max_nested_state_length();
if (max_nested_state_len > 0) {
@@ -3243,6 +3277,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
env->msr_ia32_sgxlepubkeyhash[3]);
}
+ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD,
+ env->msr_xfd);
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
+ env->msr_xfd_err);
+ }
+
/* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
* kvm_put_msr_feature_control. */
}
@@ -3298,13 +3339,14 @@ static int kvm_get_xsave(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
void *xsave = env->xsave_buf;
- int ret;
+ int type, ret;
if (!has_xsave) {
return kvm_get_fpu(cpu);
}
- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
+ type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
+ ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
if (ret < 0) {
return ret;
}
@@ -3634,6 +3676,11 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
}
+ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
+ }
+
ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
if (ret < 0) {
return ret;
@@ -3930,6 +3977,12 @@ static int kvm_get_msrs(X86CPU *cpu)
env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
msrs[i].data;
break;
+ case MSR_IA32_XFD:
+ env->msr_xfd = msrs[i].data;
+ break;
+ case MSR_IA32_XFD_ERR:
+ env->msr_xfd_err = msrs[i].data;
+ break;
}
}
@@ -4940,16 +4993,18 @@ void kvm_arch_init_irq_routing(KVMState *s)
kvm_gsi_routing_allowed = true;
if (kvm_irqchip_is_split()) {
+ KVMRouteChange c = kvm_irqchip_begin_route_changes(s);
int i;
/* If the ioapic is in QEMU and the lapics are in KVM, reserve
MSI routes for signaling interrupts to the local apics. */
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
- if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) {
+ if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) {
error_report("Could not enable split IRQ mode.");
exit(1);
}
}
+ kvm_irqchip_commit_route_changes(&c);
}
}
@@ -5149,3 +5204,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
{
return !sev_es_enabled();
}
+
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
+
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
+{
+ KVMState *s = kvm_state;
+ uint64_t supported;
+
+ mask &= XSTATE_DYNAMIC_MASK;
+ if (!mask) {
+ return;
+ }
+ /*
+ * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
+ * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
+ * about them already because they are not supported features.
+ */
+ supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+ supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
+ mask &= supported;
+
+ while (mask) {
+ int bit = ctz64(mask);
+ int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
+ if (rc) {
+ /*
+ * Older kernel version (<5.17) do not support
+ * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
+ * any dynamic feature from kvm_arch_get_supported_cpuid.
+ */
+ warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
+ "for feature bit %d", bit);
+ }
+ mask &= ~BIT_ULL(bit);
+ }
+}
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index a978509d50..4124912c20 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
bool kvm_enable_sgx_provisioning(KVMState *s);
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
#endif
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 6202f47793..7c54bada81 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1483,6 +1483,48 @@ static const VMStateDescription vmstate_pdptrs = {
}
};
+static bool xfd_msrs_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD);
+}
+
+static const VMStateDescription vmstate_msr_xfd = {
+ .name = "cpu/msr_xfd",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = xfd_msrs_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(env.msr_xfd, X86CPU),
+ VMSTATE_UINT64(env.msr_xfd_err, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#ifdef TARGET_X86_64
+static bool amx_xtile_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE);
+}
+
+static const VMStateDescription vmstate_amx_xtile = {
+ .name = "cpu/intel_amx_xtile",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = amx_xtile_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64),
+ VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192),
+ VMSTATE_END_OF_LIST()
+ }
+};
+#endif
+
const VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
@@ -1622,6 +1664,10 @@ const VMStateDescription vmstate_x86_cpu = {
&vmstate_msr_tsx_ctrl,
&vmstate_msr_intel_sgx,
&vmstate_pdptrs,
+ &vmstate_msr_xfd,
+#ifdef TARGET_X86_64
+ &vmstate_amx_xtile,
+#endif
NULL
}
};
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index baa905a0cd..bffd82923f 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -28,6 +28,42 @@
#include "helper-tcg.h"
#include "seg_helper.h"
+int get_pg_mode(CPUX86State *env)
+{
+ int pg_mode = 0;
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ return 0;
+ }
+ if (env->cr[0] & CR0_WP_MASK) {
+ pg_mode |= PG_MODE_WP;
+ }
+ if (env->cr[4] & CR4_PAE_MASK) {
+ pg_mode |= PG_MODE_PAE;
+ if (env->efer & MSR_EFER_NXE) {
+ pg_mode |= PG_MODE_NXE;
+ }
+ }
+ if (env->cr[4] & CR4_PSE_MASK) {
+ pg_mode |= PG_MODE_PSE;
+ }
+ if (env->cr[4] & CR4_SMEP_MASK) {
+ pg_mode |= PG_MODE_SMEP;
+ }
+ if (env->hflags & HF_LMA_MASK) {
+ pg_mode |= PG_MODE_LMA;
+ if (env->cr[4] & CR4_PKE_MASK) {
+ pg_mode |= PG_MODE_PKE;
+ }
+ if (env->cr[4] & CR4_PKS_MASK) {
+ pg_mode |= PG_MODE_PKS;
+ }
+ if (env->cr[4] & CR4_LA57_MASK) {
+ pg_mode |= PG_MODE_LA57;
+ }
+ }
+ return pg_mode;
+}
+
/* return non zero if error */
static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr,
uint32_t *e2_ptr, int selector,
@@ -794,7 +830,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level)
{
X86CPU *cpu = env_archcpu(env);
- int index;
+ int index, pg_mode;
+ target_ulong rsp;
+ int32_t sext;
#if 0
printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
@@ -808,7 +846,17 @@ static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level)
if ((index + 7) > env->tr.limit) {
raise_exception_err(env, EXCP0A_TSS, env->tr.selector & 0xfffc);
}
- return cpu_ldq_kernel(env, env->tr.base + index);
+
+ rsp = cpu_ldq_kernel(env, env->tr.base + index);
+
+ /* test virtual address sign extension */
+ pg_mode = get_pg_mode(env);
+ sext = (int64_t)rsp >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
+ if (sext != 0 && sext != -1) {
+ raise_exception_err(env, EXCP0C_STACK, 0);
+ }
+
+ return rsp;
}
/* 64 bit interrupt */
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index 5627772e7c..e1b6d88683 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -22,39 +22,6 @@
#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
-int get_pg_mode(CPUX86State *env)
-{
- int pg_mode = 0;
- if (env->cr[0] & CR0_WP_MASK) {
- pg_mode |= PG_MODE_WP;
- }
- if (env->cr[4] & CR4_PAE_MASK) {
- pg_mode |= PG_MODE_PAE;
- }
- if (env->cr[4] & CR4_PSE_MASK) {
- pg_mode |= PG_MODE_PSE;
- }
- if (env->cr[4] & CR4_PKE_MASK) {
- pg_mode |= PG_MODE_PKE;
- }
- if (env->cr[4] & CR4_PKS_MASK) {
- pg_mode |= PG_MODE_PKS;
- }
- if (env->cr[4] & CR4_SMEP_MASK) {
- pg_mode |= PG_MODE_SMEP;
- }
- if (env->cr[4] & CR4_LA57_MASK) {
- pg_mode |= PG_MODE_LA57;
- }
- if (env->hflags & HF_LMA_MASK) {
- pg_mode |= PG_MODE_LMA;
- }
- if (env->efer & MSR_EFER_NXE) {
- pg_mode |= PG_MODE_NXE;
- }
- return pg_mode;
-}
-
#define PG_ERROR_OK (-1)
typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
@@ -279,9 +246,7 @@ do_check_protect_pse36:
*prot |= PAGE_EXEC;
}
- if (!(pg_mode & PG_MODE_LMA)) {
- pkr = 0;
- } else if (ptep & PG_USER_MASK) {
+ if (ptep & PG_USER_MASK) {
pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
} else {
pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
@@ -344,8 +309,7 @@ do_check_protect_pse36:
if (is_user)
error_code |= PG_ERROR_U_MASK;
if (is_write1 == 2 &&
- (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) ||
- (pg_mode & PG_MODE_SMEP)))
+ ((pg_mode & PG_MODE_NXE) || (pg_mode & PG_MODE_SMEP)))
error_code |= PG_ERROR_I_D_MASK;
return error_code;
}
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index c7e25abf42..ecddf0cb91 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -256,6 +256,21 @@ static int whpx_set_tsc(CPUState *cpu)
return 0;
}
+/*
+ * The CR8 register in the CPU is mapped to the TPR register of the APIC,
+ * however, they use a slightly different encoding. Specifically:
+ *
+ * APIC.TPR[bits 7:4] = CR8[bits 3:0]
+ *
+ * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64
+ * and IA-32 Architectures Software Developer's Manual.
+ */
+
+static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr)
+{
+ return tpr >> 4;
+}
+
static void whpx_set_registers(CPUState *cpu, int level)
{
struct whpx_state *whpx = &whpx_global;
@@ -284,7 +299,7 @@ static void whpx_set_registers(CPUState *cpu, int level)
v86 = (env->eflags & VM_MASK);
r86 = !(env->cr[0] & CR0_PE_MASK);
- vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
+ vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state));
vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
idx = 0;
@@ -475,6 +490,17 @@ static void whpx_get_registers(CPUState *cpu)
hr);
}
+ if (whpx_apic_in_platform()) {
+ /*
+ * Fetch the TPR value from the emulated APIC. It may get overwritten
+ * below with the value from CR8 returned by
+ * WHvGetVirtualProcessorRegisters().
+ */
+ whpx_apic_get(x86_cpu->apic_state);
+ vcpu->tpr = whpx_apic_tpr_to_cr8(
+ cpu_get_apic_tpr(x86_cpu->apic_state));
+ }
+
idx = 0;
/* Indexes for first 16 registers match between HV and QEMU definitions */
@@ -604,6 +630,8 @@ static void whpx_get_registers(CPUState *cpu)
whpx_apic_get(x86_cpu->apic_state);
}
+ x86_update_hflags(env);
+
return;
}
diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
index ac61a96344..996e9f3bfe 100644
--- a/target/i386/xsave_helper.c
+++ b/target/i386/xsave_helper.c
@@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
memcpy(pkru, &env->pkru, sizeof(env->pkru));
}
+
+ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
+ if (e->size && e->offset) {
+ XSaveXTILECFG *tilecfg = buf + e->offset;
+
+ memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg));
+ }
+
+ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
+ if (e->size && e->offset && buflen >= e->size + e->offset) {
+ XSaveXTILEDATA *tiledata = buf + e->offset;
+
+ memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata));
+ }
#endif
}
@@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
pkru = buf + e->offset;
memcpy(&env->pkru, pkru, sizeof(env->pkru));
}
+
+ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
+ if (e->size && e->offset) {
+ const XSaveXTILECFG *tilecfg = buf + e->offset;
+
+ memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg));
+ }
+
+ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
+ if (e->size && e->offset && buflen >= e->size + e->offset) {
+ const XSaveXTILEDATA *tiledata = buf + e->offset;
+
+ memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata));
+ }
#endif
}