aboutsummaryrefslogtreecommitdiff
path: root/target/i386/kvm/kvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/i386/kvm/kvm.c')
-rw-r--r--target/i386/kvm/kvm.c125
1 files changed, 108 insertions, 17 deletions
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 83d0988302..ef2c68a6f4 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -17,6 +17,7 @@
#include "qapi/error.h"
#include <sys/ioctl.h>
#include <sys/utsname.h>
+#include <sys/syscall.h>
#include <linux/kvm.h>
#include "standard-headers/asm-x86/kvm_para.h"
@@ -123,6 +124,7 @@ static uint32_t num_architectural_pmu_gp_counters;
static uint32_t num_architectural_pmu_fixed_counters;
static int has_xsave;
+static int has_xsave2;
static int has_xcrs;
static int has_pit_state2;
static int has_sregs2;
@@ -349,6 +351,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
struct kvm_cpuid2 *cpuid;
uint32_t ret = 0;
uint32_t cpuid_1_edx;
+ uint64_t bitmask;
cpuid = get_supported_cpuid(s);
@@ -406,6 +409,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
if (!has_msr_arch_capabs) {
ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
}
+ } else if (function == 0xd && index == 0 &&
+ (reg == R_EAX || reg == R_EDX)) {
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask
+ };
+
+ bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
+ if (!sys_attr) {
+ warn_report("cannot get sys attribute capabilities %d", sys_attr);
+ }
+
+ int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
+ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
+ "error: %d", rc);
+ }
+ ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
} else if (function == 0x80000001 && reg == R_ECX) {
/*
* It's safe to enable TOPOEXT even if it's not returned by
@@ -1566,6 +1588,26 @@ static Error *invtsc_mig_blocker;
#define KVM_MAX_CPUID_ENTRIES 100
+static void kvm_init_xsave(CPUX86State *env)
+{
+ if (has_xsave2) {
+ env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
+ } else if (has_xsave) {
+ env->xsave_buf_len = sizeof(struct kvm_xsave);
+ } else {
+ return;
+ }
+
+ env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+ memset(env->xsave_buf, 0, env->xsave_buf_len);
+ /*
+ * The allocated storage must be large enough for all of the
+ * possible XSAVE state components.
+ */
+ assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
+ env->xsave_buf_len);
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
@@ -1595,6 +1637,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpuid_i = 0;
+ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
+
r = kvm_arch_set_tsc_khz(cs);
if (r < 0) {
return r;
@@ -1760,7 +1804,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
c = &cpuid_data.entries[cpuid_i++];
}
break;
- case 0x14: {
+ case 0x14:
+ case 0x1d:
+ case 0x1e: {
uint32_t times;
c->function = i;
@@ -1982,19 +2028,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
if (r) {
goto fail;
}
-
- if (has_xsave) {
- env->xsave_buf_len = sizeof(struct kvm_xsave);
- env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
- memset(env->xsave_buf, 0, env->xsave_buf_len);
-
- /*
- * The allocated storage must be large enough for all of the
- * possible XSAVE state components.
- */
- assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
- <= env->xsave_buf_len);
- }
+ kvm_init_xsave(env);
max_nested_state_len = kvm_max_nested_state_length();
if (max_nested_state_len > 0) {
@@ -3243,6 +3277,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
env->msr_ia32_sgxlepubkeyhash[3]);
}
+ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD,
+ env->msr_xfd);
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
+ env->msr_xfd_err);
+ }
+
/* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
* kvm_put_msr_feature_control. */
}
@@ -3298,13 +3339,14 @@ static int kvm_get_xsave(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
void *xsave = env->xsave_buf;
- int ret;
+ int type, ret;
if (!has_xsave) {
return kvm_get_fpu(cpu);
}
- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
+ type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
+ ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
if (ret < 0) {
return ret;
}
@@ -3634,6 +3676,11 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
}
+ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
+ }
+
ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
if (ret < 0) {
return ret;
@@ -3930,6 +3977,12 @@ static int kvm_get_msrs(X86CPU *cpu)
env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
msrs[i].data;
break;
+ case MSR_IA32_XFD:
+ env->msr_xfd = msrs[i].data;
+ break;
+ case MSR_IA32_XFD_ERR:
+ env->msr_xfd_err = msrs[i].data;
+ break;
}
}
@@ -4940,16 +4993,18 @@ void kvm_arch_init_irq_routing(KVMState *s)
kvm_gsi_routing_allowed = true;
if (kvm_irqchip_is_split()) {
+ KVMRouteChange c = kvm_irqchip_begin_route_changes(s);
int i;
/* If the ioapic is in QEMU and the lapics are in KVM, reserve
MSI routes for signaling interrupts to the local apics. */
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
- if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) {
+ if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) {
error_report("Could not enable split IRQ mode.");
exit(1);
}
}
+ kvm_irqchip_commit_route_changes(&c);
}
}
@@ -5149,3 +5204,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
{
return !sev_es_enabled();
}
+
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
+
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
+{
+ KVMState *s = kvm_state;
+ uint64_t supported;
+
+ mask &= XSTATE_DYNAMIC_MASK;
+ if (!mask) {
+ return;
+ }
+ /*
+ * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
+ * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
+ * about them already because they are not supported features.
+ */
+ supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+ supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
+ mask &= supported;
+
+ while (mask) {
+ int bit = ctz64(mask);
+ int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
+ if (rc) {
+ /*
+ * Older kernel version (<5.17) do not support
+ * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
+ * any dynamic feature from kvm_arch_get_supported_cpuid.
+ */
+ warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
+ "for feature bit %d", bit);
+ }
+ mask &= ~BIT_ULL(bit);
+ }
+}