diff options
Diffstat (limited to 'target/i386/kvm/kvm.c')
-rw-r--r-- | target/i386/kvm/kvm.c | 125 |
1 files changed, 108 insertions, 17 deletions
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 83d0988302..ef2c68a6f4 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -17,6 +17,7 @@ #include "qapi/error.h" #include <sys/ioctl.h> #include <sys/utsname.h> +#include <sys/syscall.h> #include <linux/kvm.h> #include "standard-headers/asm-x86/kvm_para.h" @@ -123,6 +124,7 @@ static uint32_t num_architectural_pmu_gp_counters; static uint32_t num_architectural_pmu_fixed_counters; static int has_xsave; +static int has_xsave2; static int has_xcrs; static int has_pit_state2; static int has_sregs2; @@ -349,6 +351,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, struct kvm_cpuid2 *cpuid; uint32_t ret = 0; uint32_t cpuid_1_edx; + uint64_t bitmask; cpuid = get_supported_cpuid(s); @@ -406,6 +409,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!has_msr_arch_capabs) { ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; } + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { + warn_report("cannot get sys attribute capabilities %d", sys_attr); + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { + warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " + "error: %d", rc); + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; } else if (function == 0x80000001 && reg == R_ECX) { /* * It's safe to enable TOPOEXT even if it's not returned by @@ -1566,6 +1588,26 @@ static Error *invtsc_mig_blocker; #define KVM_MAX_CPUID_ENTRIES 100 +static void kvm_init_xsave(CPUX86State *env) +{ + if (has_xsave2) { + env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); + } else if (has_xsave) { + env->xsave_buf_len = sizeof(struct kvm_xsave); + } else { + return; + } + + env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); + memset(env->xsave_buf, 0, env->xsave_buf_len); + /* + * The allocated storage must be large enough for all of the + * possible XSAVE state components. + */ + assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= + env->xsave_buf_len); +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -1595,6 +1637,8 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = 0; + has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + r = kvm_arch_set_tsc_khz(cs); if (r < 0) { return r; @@ -1760,7 +1804,9 @@ int kvm_arch_init_vcpu(CPUState *cs) c = &cpuid_data.entries[cpuid_i++]; } break; - case 0x14: { + case 0x14: + case 0x1d: + case 0x1e: { uint32_t times; c->function = i; @@ -1982,19 +2028,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (r) { goto fail; } - - if (has_xsave) { - env->xsave_buf_len = sizeof(struct kvm_xsave); - env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); - memset(env->xsave_buf, 0, env->xsave_buf_len); - - /* - * The allocated storage must be large enough for all of the - * possible XSAVE state components. - */ - assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) - <= env->xsave_buf_len); - } + kvm_init_xsave(env); max_nested_state_len = kvm_max_nested_state_length(); if (max_nested_state_len > 0) { @@ -3243,6 +3277,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) env->msr_ia32_sgxlepubkeyhash[3]); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, + env->msr_xfd); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, + env->msr_xfd_err); + } + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ } @@ -3298,13 +3339,14 @@ static int kvm_get_xsave(X86CPU *cpu) { CPUX86State *env = &cpu->env; void *xsave = env->xsave_buf; - int ret; + int type, ret; if (!has_xsave) { return kvm_get_fpu(cpu); } - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); + type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; + ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); if (ret < 0) { return ret; } @@ -3634,6 +3676,11 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); } + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); + } + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); if (ret < 0) { return ret; @@ -3930,6 +3977,12 @@ static int kvm_get_msrs(X86CPU *cpu) env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = msrs[i].data; break; + case MSR_IA32_XFD: + env->msr_xfd = msrs[i].data; + break; + case MSR_IA32_XFD_ERR: + env->msr_xfd_err = msrs[i].data; + break; } } @@ -4940,16 +4993,18 @@ void kvm_arch_init_irq_routing(KVMState *s) kvm_gsi_routing_allowed = true; if (kvm_irqchip_is_split()) { + KVMRouteChange c = kvm_irqchip_begin_route_changes(s); int i; /* If the ioapic is in QEMU and the lapics are in KVM, reserve MSI routes for signaling interrupts to the local apics. */ for (i = 0; i < IOAPIC_NUM_PINS; i++) { - if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) { + if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) { error_report("Could not enable split IRQ mode."); exit(1); } } + kvm_irqchip_commit_route_changes(&c); } } @@ -5149,3 +5204,39 @@ bool kvm_arch_cpu_check_are_resettable(void) { return !sev_es_enabled(); } + +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +{ + KVMState *s = kvm_state; + uint64_t supported; + + mask &= XSTATE_DYNAMIC_MASK; + if (!mask) { + return; + } + /* + * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. + * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned + * about them already because they are not supported features. + */ + supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); + supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; + mask &= supported; + + while (mask) { + int bit = ctz64(mask); + int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); + if (rc) { + /* + * Older kernel version (<5.17) do not support + * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return + * any dynamic feature from kvm_arch_get_supported_cpuid. + */ + warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " + "for feature bit %d", bit); + } + mask &= ~BIT_ULL(bit); + } +} |