diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2022-03-15 14:41:16 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2022-03-15 14:41:16 +0000 |
commit | dee3a86d54f7d200e715843ee92aba2aaeb8382f (patch) | |
tree | b87d8b1c2d8ac5eda62de9d3d93b16a8502a8e65 /target | |
parent | 1af26ce67082727aeb109708f2c75d5fc5364e23 (diff) | |
parent | 8a5606f6449c90a0731790ed03eebedcf7045070 (diff) |
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* whpx fixes in preparation for GDB support (Ivan)
* VSS header fixes (Marc-André)
* 5-level EPT support (Vitaly)
* AMX support (Jing Liu & Yang Zhong)
* Bundle changes to MSI routes (Longpeng)
* More precise emulation of #SS (Gareth)
* Disable ASAN testing
# gpg: Signature made Tue 15 Mar 2022 10:51:00 GMT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (22 commits)
gitlab-ci: do not run tests with address sanitizer
KVM: SVM: always set MSR_AMD64_TSC_RATIO to default value
i386: Add Icelake-Server-v6 CPU model with 5-level EPT support
x86: Support XFD and AMX xsave data migration
x86: add support for KVM_CAP_XSAVE2 and AMX state migration
x86: Add AMX CPUIDs enumeration
x86: Add XFD faulting bit for state components
x86: Grant AMX permission for guest
x86: Add AMX XTILECFG and XTILEDATA components
x86: Fix the 64-byte boundary enumeration for extended state
linux-headers: include missing changes from 5.17
target/i386: Throw a #SS when loading a non-canonical IST
target/i386: only include bits in pg_mode if they are not ignored
kvm/msi: do explicit commit when adding msi routes
kvm-irqchip: introduce new API to support route change
update meson-buildoptions.sh
qga/vss: update informative message about MinGW
qga/vss-win32: check old VSS SDK headers
meson: fix generic location of vss headers
vmxcap: Add 5-level EPT bit
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/i386/cpu.c | 84 | ||||
-rw-r--r-- | target/i386/cpu.h | 43 | ||||
-rw-r--r-- | target/i386/kvm/kvm-cpu.c | 11 | ||||
-rw-r--r-- | target/i386/kvm/kvm.c | 125 | ||||
-rw-r--r-- | target/i386/kvm/kvm_i386.h | 1 | ||||
-rw-r--r-- | target/i386/machine.c | 46 | ||||
-rw-r--r-- | target/i386/tcg/seg_helper.c | 52 | ||||
-rw-r--r-- | target/i386/tcg/sysemu/excp_helper.c | 40 | ||||
-rw-r--r-- | target/i386/whpx/whpx-all.c | 30 | ||||
-rw-r--r-- | target/i386/xsave_helper.c | 28 |
10 files changed, 390 insertions, 70 deletions
diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 6c7ef1099b..a88d6554c8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -575,6 +575,18 @@ static CPUCacheInfo legacy_l3_cache = { #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ +/* CPUID Leaf 0x1D constants: */ +#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1 +#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000 +#define INTEL_AMX_BYTES_PER_TILE 0x400 +#define INTEL_AMX_BYTES_PER_ROW 0x40 +#define INTEL_AMX_TILE_MAX_NAMES 0x8 +#define INTEL_AMX_TILE_MAX_ROWS 0x10 + +/* CPUID Leaf 0x1E constants: */ +#define INTEL_AMX_TMUL_MAX_K 0x10 +#define INTEL_AMX_TMUL_MAX_N 0x40 + void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, uint32_t vendor2, uint32_t vendor3) { @@ -844,8 +856,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx512-vp2intersect", NULL, "md-clear", NULL, NULL, NULL, "serialize", NULL, "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, - NULL, NULL, NULL, "avx512-fp16", - NULL, NULL, "spec-ctrl", "stibp", + NULL, NULL, "amx-bf16", "avx512-fp16", + "amx-tile", "amx-int8", "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", }, .cpuid = { @@ -910,7 +922,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { "xsaveopt", "xsavec", "xgetbv1", "xsaves", - NULL, NULL, NULL, NULL, + "xfd", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1402,6 +1414,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_PKRU_BIT] = { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, .size = sizeof(XSavePKRU) }, + [XSTATE_XTILE_CFG_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILECFG), + }, + [XSTATE_XTILE_DATA_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILEDATA) + }, }; static uint32_t xsave_area_size(uint64_t mask) @@ -3506,6 +3526,14 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 6, + .note = "5-level EPT", + .props = (PropValue[]) { + { "vmx-page-walk-5", "on" }, + { /* end of list */ } + }, + }, { /* end of list */ } } }, @@ -5488,6 +5516,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, const ExtSaveArea *esa = &x86_ext_save_areas[count]; *eax = esa->size; *ebx = esa->offset; + *ecx = esa->ecx & + (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); } } break; @@ -5576,6 +5606,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1D: { + /* AMX TILE */ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *eax = INTEL_AMX_TILE_MAX_SUBLEAF; + } else if (count == 1) { + *eax = INTEL_AMX_TOTAL_TILE_BYTES | + (INTEL_AMX_BYTES_PER_TILE << 16); + *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16); + *ecx = INTEL_AMX_TILE_MAX_ROWS; + } + break; + } + case 0x1E: { + /* AMX TMUL */ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8); + } + break; + } case 0x40000000: /* * CPUID code in kvm_arch_init_vcpu() ignores stuff @@ -5930,9 +5997,7 @@ static void x86_cpu_reset(DeviceState *dev) x86_cpu_set_sgxlepubkeyhash(env); - if (env->features[FEAT_SVM] & CPUID_SVM_TSCSCALE) { - env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT; - } + env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT; #endif } @@ -5998,6 +6063,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) CPUX86State *env = &cpu->env; int i; uint64_t mask; + static bool request_perm; if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { env->features[FEAT_XSAVE_COMP_LO] = 0; @@ -6013,6 +6079,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) } } + /* Only request permission for first vcpu */ + if (kvm_enabled() && !request_perm) { + kvm_request_xsave_components(cpu, mask); + request_perm = true; + } + env->features[FEAT_XSAVE_COMP_LO] = mask; env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e11734ba86..5e406088a9 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -507,6 +507,9 @@ typedef enum X86Seg { #define MSR_VM_HSAVE_PA 0xc0010117 +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 + #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_UMWAIT_CONTROL 0xe1 @@ -539,6 +542,8 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 #define XSTATE_PKRU_BIT 9 +#define XSTATE_XTILE_CFG_BIT 17 +#define XSTATE_XTILE_DATA_BIT 18 #define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT) #define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT) @@ -549,6 +554,17 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) + +#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + +#define ESA_FEATURE_ALIGN64_BIT 1 +#define ESA_FEATURE_XFD_BIT 2 + +#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) +#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) + /* CPUID feature words */ typedef enum FeatureWord { @@ -842,6 +858,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) /* AVX512_FP16 instruction */ #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) +/* AMX tile (two-dimensional register) */ +#define CPUID_7_0_EDX_AMX_TILE (1U << 24) /* Speculation Control */ #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Single Thread Indirect Branch Predictors */ @@ -857,6 +875,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* XFD Extend Feature Disabled */ +#define CPUID_D_1_EAX_XFD (1U << 4) /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) @@ -1345,6 +1365,16 @@ typedef struct XSavePKRU { uint32_t padding; } XSavePKRU; +/* Ext. save area 17: AMX XTILECFG state */ +typedef struct XSaveXTILECFG { + uint8_t xtilecfg[64]; +} XSaveXTILECFG; + +/* Ext. save area 18: AMX XTILEDATA state */ +typedef struct XSaveXTILEDATA { + uint8_t xtiledata[8][1024]; +} XSaveXTILEDATA; + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); @@ -1352,13 +1382,16 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40); QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200); QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); typedef struct ExtSaveArea { uint32_t feature, bits; uint32_t offset, size; + uint32_t ecx; } ExtSaveArea; -#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) +#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1) extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT]; @@ -1499,6 +1532,10 @@ typedef struct CPUArchState { uint64_t opmask_regs[NB_OPMASK_REGS]; YMMReg zmmh_regs[CPU_NB_REGS]; ZMMReg hi16_zmm_regs[CPU_NB_REGS]; +#ifdef TARGET_X86_64 + uint8_t xtilecfg[64]; + uint8_t xtiledata[8192]; +#endif /* sysenter registers */ uint32_t sysenter_cs; @@ -1584,6 +1621,10 @@ typedef struct CPUArchState { uint64_t msr_rtit_cr3_match; uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; + /* Per-VCPU XFD MSRs */ + uint64_t msr_xfd; + uint64_t msr_xfd_err; + /* exception/interrupt handling */ int error_code; int exception_is_int; diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index d95028018e..a35a1bf9fe 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) static void kvm_cpu_xsave_init(void) { static bool first = true; - KVMState *s = kvm_state; + uint32_t eax, ebx, ecx, edx; int i; if (!first) { @@ -100,10 +100,11 @@ static void kvm_cpu_xsave_init(void) ExtSaveArea *esa = &x86_ext_save_areas[i]; if (esa->size) { - int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX); - if (sz != 0) { - assert(esa->size == sz); - esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); + host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); + if (eax != 0) { + assert(esa->size == eax); + esa->offset = ebx; + esa->ecx = ecx; } } } diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 83d0988302..ef2c68a6f4 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -17,6 +17,7 @@ #include "qapi/error.h" #include <sys/ioctl.h> #include <sys/utsname.h> +#include <sys/syscall.h> #include <linux/kvm.h> #include "standard-headers/asm-x86/kvm_para.h" @@ -123,6 +124,7 @@ static uint32_t num_architectural_pmu_gp_counters; static uint32_t num_architectural_pmu_fixed_counters; static int has_xsave; +static int has_xsave2; static int has_xcrs; static int has_pit_state2; static int has_sregs2; @@ -349,6 +351,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, struct kvm_cpuid2 *cpuid; uint32_t ret = 0; uint32_t cpuid_1_edx; + uint64_t bitmask; cpuid = get_supported_cpuid(s); @@ -406,6 +409,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!has_msr_arch_capabs) { ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; } + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { + warn_report("cannot get sys attribute capabilities %d", sys_attr); + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { + warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " + "error: %d", rc); + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; } else if (function == 0x80000001 && reg == R_ECX) { /* * It's safe to enable TOPOEXT even if it's not returned by @@ -1566,6 +1588,26 @@ static Error *invtsc_mig_blocker; #define KVM_MAX_CPUID_ENTRIES 100 +static void kvm_init_xsave(CPUX86State *env) +{ + if (has_xsave2) { + env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); + } else if (has_xsave) { + env->xsave_buf_len = sizeof(struct kvm_xsave); + } else { + return; + } + + env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); + memset(env->xsave_buf, 0, env->xsave_buf_len); + /* + * The allocated storage must be large enough for all of the + * possible XSAVE state components. + */ + assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= + env->xsave_buf_len); +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -1595,6 +1637,8 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = 0; + has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + r = kvm_arch_set_tsc_khz(cs); if (r < 0) { return r; @@ -1760,7 +1804,9 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; } break; - case 0x14: { + case 0x14: + case 0x1d: + case 0x1e: { uint32_t times; c->function = i; @@ -1982,19 +2028,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (r) { goto fail; } - - if (has_xsave) { - env->xsave_buf_len = sizeof(struct kvm_xsave); - env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); - memset(env->xsave_buf, 0, env->xsave_buf_len); - - /* - * The allocated storage must be large enough for all of the - * possible XSAVE state components. - */ - assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) - <= env->xsave_buf_len); - } + kvm_init_xsave(env); max_nested_state_len = kvm_max_nested_state_length(); if (max_nested_state_len > 0) { @@ -3243,6 +3277,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) env->msr_ia32_sgxlepubkeyhash[3]); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, + env->msr_xfd); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, + env->msr_xfd_err); + } + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ } @@ -3298,13 +3339,14 @@ static int kvm_get_xsave(X86CPU *cpu) { CPUX86State *env = &cpu->env; void *xsave = env->xsave_buf; - int ret; + int type, ret; if (!has_xsave) { return kvm_get_fpu(cpu); } - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); + type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; + ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); if (ret < 0) { return ret; } @@ -3634,6 +3676,11 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); + } + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); if (ret < 0) { return ret; @@ -3930,6 +3977,12 @@ static int kvm_get_msrs(X86CPU *cpu) env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = msrs[i].data; break; + case MSR_IA32_XFD: + env->msr_xfd = msrs[i].data; + break; + case MSR_IA32_XFD_ERR: + env->msr_xfd_err = msrs[i].data; + break; } } @@ -4940,16 +4993,18 @@ void kvm_arch_init_irq_routing(KVMState *s) kvm_gsi_routing_allowed = true; if (kvm_irqchip_is_split()) { + KVMRouteChange c = kvm_irqchip_begin_route_changes(s); int i; /* If the ioapic is in QEMU and the lapics are in KVM, reserve MSI routes for signaling interrupts to the local apics. */ for (i = 0; i < IOAPIC_NUM_PINS; i++) { - if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) { + if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) { error_report("Could not enable split IRQ mode."); exit(1); } } + kvm_irqchip_commit_route_changes(&c); } } @@ -5149,3 +5204,39 @@ bool kvm_arch_cpu_check_are_resettable(void) { return !sev_es_enabled(); } + +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +{ + KVMState *s = kvm_state; + uint64_t supported; + + mask &= XSTATE_DYNAMIC_MASK; + if (!mask) { + return; + } + /* + * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. + * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned + * about them already because they are not supported features. + */ + supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); + supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; + mask &= supported; + + while (mask) { + int bit = ctz64(mask); + int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); + if (rc) { + /* + * Older kernel version (<5.17) do not support + * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return + * any dynamic feature from kvm_arch_get_supported_cpuid. + */ + warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " + "for feature bit %d", bit); + } + mask &= ~BIT_ULL(bit); + } +} diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index a978509d50..4124912c20 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); bool kvm_enable_sgx_provisioning(KVMState *s); +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #endif diff --git a/target/i386/machine.c b/target/i386/machine.c index 6202f47793..7c54bada81 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1483,6 +1483,48 @@ static const VMStateDescription vmstate_pdptrs = { } }; +static bool xfd_msrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD); +} + +static const VMStateDescription vmstate_msr_xfd = { + .name = "cpu/msr_xfd", + .version_id = 1, + .minimum_version_id = 1, + .needed = xfd_msrs_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.msr_xfd, X86CPU), + VMSTATE_UINT64(env.msr_xfd_err, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +#ifdef TARGET_X86_64 +static bool amx_xtile_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE); +} + +static const VMStateDescription vmstate_amx_xtile = { + .name = "cpu/intel_amx_xtile", + .version_id = 1, + .minimum_version_id = 1, + .needed = amx_xtile_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64), + VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192), + VMSTATE_END_OF_LIST() + } +}; +#endif + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1622,6 +1664,10 @@ const VMStateDescription vmstate_x86_cpu = { &vmstate_msr_tsx_ctrl, &vmstate_msr_intel_sgx, &vmstate_pdptrs, + &vmstate_msr_xfd, +#ifdef TARGET_X86_64 + &vmstate_amx_xtile, +#endif NULL } }; diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index baa905a0cd..bffd82923f 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -28,6 +28,42 @@ #include "helper-tcg.h" #include "seg_helper.h" +int get_pg_mode(CPUX86State *env) +{ + int pg_mode = 0; + if (!(env->cr[0] & CR0_PG_MASK)) { + return 0; + } + if (env->cr[0] & CR0_WP_MASK) { + pg_mode |= PG_MODE_WP; + } + if (env->cr[4] & CR4_PAE_MASK) { + pg_mode |= PG_MODE_PAE; + if (env->efer & MSR_EFER_NXE) { + pg_mode |= PG_MODE_NXE; + } + } + if (env->cr[4] & CR4_PSE_MASK) { + pg_mode |= PG_MODE_PSE; + } + if (env->cr[4] & CR4_SMEP_MASK) { + pg_mode |= PG_MODE_SMEP; + } + if (env->hflags & HF_LMA_MASK) { + pg_mode |= PG_MODE_LMA; + if (env->cr[4] & CR4_PKE_MASK) { + pg_mode |= PG_MODE_PKE; + } + if (env->cr[4] & CR4_PKS_MASK) { + pg_mode |= PG_MODE_PKS; + } + if (env->cr[4] & CR4_LA57_MASK) { + pg_mode |= PG_MODE_LA57; + } + } + return pg_mode; +} + /* return non zero if error */ static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr, uint32_t *e2_ptr, int selector, @@ -794,7 +830,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level) { X86CPU *cpu = env_archcpu(env); - int index; + int index, pg_mode; + target_ulong rsp; + int32_t sext; #if 0 printf("TR: base=" TARGET_FMT_lx " limit=%x\n", @@ -808,7 +846,17 @@ static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level) if ((index + 7) > env->tr.limit) { raise_exception_err(env, EXCP0A_TSS, env->tr.selector & 0xfffc); } - return cpu_ldq_kernel(env, env->tr.base + index); + + rsp = cpu_ldq_kernel(env, env->tr.base + index); + + /* test virtual address sign extension */ + pg_mode = get_pg_mode(env); + sext = (int64_t)rsp >> (pg_mode & PG_MODE_LA57 ? 56 : 47); + if (sext != 0 && sext != -1) { + raise_exception_err(env, EXCP0C_STACK, 0); + } + + return rsp; } /* 64 bit interrupt */ diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 5627772e7c..e1b6d88683 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -22,39 +22,6 @@ #include "exec/exec-all.h" #include "tcg/helper-tcg.h" -int get_pg_mode(CPUX86State *env) -{ - int pg_mode = 0; - if (env->cr[0] & CR0_WP_MASK) { - pg_mode |= PG_MODE_WP; - } - if (env->cr[4] & CR4_PAE_MASK) { - pg_mode |= PG_MODE_PAE; - } - if (env->cr[4] & CR4_PSE_MASK) { - pg_mode |= PG_MODE_PSE; - } - if (env->cr[4] & CR4_PKE_MASK) { - pg_mode |= PG_MODE_PKE; - } - if (env->cr[4] & CR4_PKS_MASK) { - pg_mode |= PG_MODE_PKS; - } - if (env->cr[4] & CR4_SMEP_MASK) { - pg_mode |= PG_MODE_SMEP; - } - if (env->cr[4] & CR4_LA57_MASK) { - pg_mode |= PG_MODE_LA57; - } - if (env->hflags & HF_LMA_MASK) { - pg_mode |= PG_MODE_LMA; - } - if (env->efer & MSR_EFER_NXE) { - pg_mode |= PG_MODE_NXE; - } - return pg_mode; -} - #define PG_ERROR_OK (-1) typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type, @@ -279,9 +246,7 @@ do_check_protect_pse36: *prot |= PAGE_EXEC; } - if (!(pg_mode & PG_MODE_LMA)) { - pkr = 0; - } else if (ptep & PG_USER_MASK) { + if (ptep & PG_USER_MASK) { pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0; } else { pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0; @@ -344,8 +309,7 @@ do_check_protect_pse36: if (is_user) error_code |= PG_ERROR_U_MASK; if (is_write1 == 2 && - (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) || - (pg_mode & PG_MODE_SMEP))) + ((pg_mode & PG_MODE_NXE) || (pg_mode & PG_MODE_SMEP))) error_code |= PG_ERROR_I_D_MASK; return error_code; } diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index c7e25abf42..ecddf0cb91 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -256,6 +256,21 @@ static int whpx_set_tsc(CPUState *cpu) return 0; } +/* + * The CR8 register in the CPU is mapped to the TPR register of the APIC, + * however, they use a slightly different encoding. Specifically: + * + * APIC.TPR[bits 7:4] = CR8[bits 3:0] + * + * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64 + * and IA-32 Architectures Software Developer's Manual. + */ + +static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr) +{ + return tpr >> 4; +} + static void whpx_set_registers(CPUState *cpu, int level) { struct whpx_state *whpx = &whpx_global; @@ -284,7 +299,7 @@ static void whpx_set_registers(CPUState *cpu, int level) v86 = (env->eflags & VM_MASK); r86 = !(env->cr[0] & CR0_PE_MASK); - vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state); + vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state)); vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); idx = 0; @@ -475,6 +490,17 @@ static void whpx_get_registers(CPUState *cpu) hr); } + if (whpx_apic_in_platform()) { + /* + * Fetch the TPR value from the emulated APIC. It may get overwritten + * below with the value from CR8 returned by + * WHvGetVirtualProcessorRegisters(). + */ + whpx_apic_get(x86_cpu->apic_state); + vcpu->tpr = whpx_apic_tpr_to_cr8( + cpu_get_apic_tpr(x86_cpu->apic_state)); + } + idx = 0; /* Indexes for first 16 registers match between HV and QEMU definitions */ @@ -604,6 +630,8 @@ static void whpx_get_registers(CPUState *cpu) whpx_apic_get(x86_cpu->apic_state); } + x86_update_hflags(env); + return; } diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c index ac61a96344..996e9f3bfe 100644 --- a/target/i386/xsave_helper.c +++ b/target/i386/xsave_helper.c @@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) memcpy(pkru, &env->pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); + } #endif } @@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen) pkru = buf + e->offset; memcpy(&env->pkru, pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + const XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + const XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); + } #endif } |