aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/hyperv.txt9
-rw-r--r--hw/core/machine.c3
-rw-r--r--hw/core/numa.c45
-rw-r--r--hw/i386/pc.c1
-rw-r--r--target/i386/cpu.c21
-rw-r--r--target/i386/cpu.h3
-rw-r--r--target/i386/kvm/hyperv-proto.h6
-rw-r--r--target/i386/kvm/kvm-stub.c5
-rw-r--r--target/i386/kvm/kvm.c189
-rw-r--r--target/i386/kvm/kvm_i386.h1
10 files changed, 172 insertions, 111 deletions
diff --git a/docs/hyperv.txt b/docs/hyperv.txt
index e53c581f45..000638a2fd 100644
--- a/docs/hyperv.txt
+++ b/docs/hyperv.txt
@@ -170,7 +170,7 @@ Recommended: hv-frequencies
3.16. hv-evmcs
===============
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
-enabled, it provides Enlightened VMCS feature to the guest. The feature
+enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature
implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V)
hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only.
Note: some virtualization features (e.g. Posted Interrupts) are disabled when
@@ -209,8 +209,11 @@ In some cases (e.g. during development) it may make sense to use QEMU in
'pass-through' mode and give Windows guests all enlightenments currently
supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU
flag.
-Note: enabling this flag effectively prevents migration as supported features
-may differ between target and destination.
+Note: "hv-passthrough" flag only enables enlightenments which are known to QEMU
+(have corresponding "hv-*" flag) and copies "hv-spinlocks="/"hv-vendor-id="
+values from KVM to QEMU. "hv-passthrough" overrides all other "hv-*" settings on
+the command line. Also, enabling this flag effectively prevents migration as the
+list of enabled enlightenments may differ between target and destination hosts.
4. Useful links
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 57c18f909a..6f59fb0b7f 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -728,7 +728,8 @@ void machine_set_cpu_numa_node(MachineState *machine,
if ((numa_info[props->node_id].initiator < MAX_NODES) &&
(props->node_id != numa_info[props->node_id].initiator)) {
error_setg(errp, "The initiator of CPU NUMA node %" PRId64
- " should be itself", props->node_id);
+ " should be itself (got %" PRIu16 ")",
+ props->node_id, numa_info[props->node_id].initiator);
return;
}
numa_info[props->node_id].has_cpu = true;
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 1058d3697b..510d096a88 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -88,6 +88,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
return;
}
+ /*
+ * If not set the initiator, set it to MAX_NODES. And if
+ * HMAT is enabled and this node has no cpus, QEMU will raise error.
+ */
+ numa_info[nodenr].initiator = MAX_NODES;
+ if (node->has_initiator) {
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ if (node->initiator >= MAX_NODES) {
+ error_report("The initiator id %" PRIu16 " expects an integer "
+ "between 0 and %d", node->initiator,
+ MAX_NODES - 1);
+ return;
+ }
+
+ numa_info[nodenr].initiator = node->initiator;
+ }
+
for (cpus = node->cpus; cpus; cpus = cpus->next) {
CpuInstanceProperties props;
if (cpus->value >= max_cpus) {
@@ -142,28 +165,6 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
}
- /*
- * If not set the initiator, set it to MAX_NODES. And if
- * HMAT is enabled and this node has no cpus, QEMU will raise error.
- */
- numa_info[nodenr].initiator = MAX_NODES;
- if (node->has_initiator) {
- if (!ms->numa_state->hmat_enabled) {
- error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
- "(HMAT) is disabled, enable it with -machine hmat=on "
- "before using any of hmat specific options");
- return;
- }
-
- if (node->initiator >= MAX_NODES) {
- error_report("The initiator id %" PRIu16 " expects an integer "
- "between 0 and %d", node->initiator,
- MAX_NODES - 1);
- return;
- }
-
- numa_info[nodenr].initiator = node->initiator;
- }
numa_info[nodenr].present = true;
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
ms->numa_state->num_nodes++;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8e1220db72..aa79c5e0e6 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -98,6 +98,7 @@ GlobalProperty pc_compat_6_0[] = {
{ "qemu64" "-" TYPE_X86_CPU, "family", "6" },
{ "qemu64" "-" TYPE_X86_CPU, "model", "6" },
{ "qemu64" "-" TYPE_X86_CPU, "stepping", "3" },
+ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" },
};
const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 5f595a0d7e..48b55ebd0a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5155,6 +5155,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (cpu->cache_info_passthrough) {
host_cpuid(index, 0, eax, ebx, ecx, edx);
break;
+ } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
}
*eax = 1; /* Number of CPUID[EAX=2] calls required */
*ebx = 0;
@@ -5176,6 +5179,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if ((*eax & 31) && cs->nr_cores > 1) {
*eax |= (cs->nr_cores - 1) << 26;
}
+ } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
+ *eax = *ebx = *ecx = *edx = 0;
} else {
*eax = 0;
switch (count) {
@@ -5945,8 +5950,15 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
}
}
- /* CPU topology with multi-dies support requires CPUID[0x1F] */
- if (env->nr_dies > 1) {
+ /*
+ * Intel CPU topology with multi-dies support requires CPUID[0x1F].
+ * For AMD Rome/Milan, cpuid level is 0x10, and guest OS should detect
+ * extended toplogy by leaf 0xB. Only adjust it for Intel CPU, unless
+ * cpu->vendor_cpuid_only has been unset for compatibility with older
+ * machine types.
+ */
+ if ((env->nr_dies > 1) &&
+ (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
}
@@ -5974,6 +5986,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
if (env->cpuid_xlevel2 == UINT32_MAX) {
env->cpuid_xlevel2 = env->cpuid_min_xlevel2;
}
+
+ if (kvm_enabled()) {
+ kvm_hyperv_expand_features(cpu, errp);
+ }
}
/*
@@ -6647,6 +6663,7 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true),
DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
+ DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true),
DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8f3747dd28..950a991a71 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1748,6 +1748,9 @@ struct X86CPU {
/* Enable auto level-increase for all CPUID leaves */
bool full_cpuid_auto_level;
+ /* Only advertise CPUID leaves defined by the vendor */
+ bool vendor_cpuid_only;
+
/* Enable auto level-increase for Intel Processor Trace leave */
bool intel_pt_auto_level;
diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h
index e30d64b4ad..5fbb385cc1 100644
--- a/target/i386/kvm/hyperv-proto.h
+++ b/target/i386/kvm/hyperv-proto.h
@@ -39,6 +39,12 @@
#define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13)
/*
+ * HV_CPUID_FEATURES.EBX bits
+ */
+#define HV_POST_MESSAGES (1u << 4)
+#define HV_SIGNAL_EVENTS (1u << 5)
+
+/*
* HV_CPUID_FEATURES.EDX bits
*/
#define HV_MWAIT_AVAILABLE (1u << 0)
diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c
index 92f49121b8..f6e7e4466e 100644
--- a/target/i386/kvm/kvm-stub.c
+++ b/target/i386/kvm/kvm-stub.c
@@ -39,3 +39,8 @@ bool kvm_hv_vpindex_settable(void)
{
return false;
}
+
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
+{
+ abort();
+}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a85035492f..59ed8327ac 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -813,8 +813,6 @@ static struct {
[HYPERV_FEAT_RELAXED] = {
.desc = "relaxed timing (hv-relaxed)",
.flags = {
- {.func = HV_CPUID_FEATURES, .reg = R_EAX,
- .bits = HV_HYPERCALL_AVAILABLE},
{.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
.bits = HV_RELAXED_TIMING_RECOMMENDED}
}
@@ -823,7 +821,7 @@ static struct {
.desc = "virtual APIC (hv-vapic)",
.flags = {
{.func = HV_CPUID_FEATURES, .reg = R_EAX,
- .bits = HV_HYPERCALL_AVAILABLE | HV_APIC_ACCESS_AVAILABLE},
+ .bits = HV_APIC_ACCESS_AVAILABLE},
{.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
.bits = HV_APIC_ACCESS_RECOMMENDED}
}
@@ -832,8 +830,7 @@ static struct {
.desc = "clocksources (hv-time)",
.flags = {
{.func = HV_CPUID_FEATURES, .reg = R_EAX,
- .bits = HV_HYPERCALL_AVAILABLE | HV_TIME_REF_COUNT_AVAILABLE |
- HV_REFERENCE_TSC_AVAILABLE}
+ .bits = HV_TIME_REF_COUNT_AVAILABLE | HV_REFERENCE_TSC_AVAILABLE}
}
},
[HYPERV_FEAT_CRASH] = {
@@ -1148,16 +1145,12 @@ static bool hyperv_feature_supported(CPUState *cs, int feature)
return true;
}
-static int hv_cpuid_check_and_set(CPUState *cs, int feature, Error **errp)
+/* Checks that all feature dependencies are enabled */
+static bool hv_feature_check_deps(X86CPU *cpu, int feature, Error **errp)
{
- X86CPU *cpu = X86_CPU(cs);
uint64_t deps;
int dep_feat;
- if (!hyperv_feat_enabled(cpu, feature) && !cpu->hyperv_passthrough) {
- return 0;
- }
-
deps = kvm_hyperv_properties[feature].dependencies;
while (deps) {
dep_feat = ctz64(deps);
@@ -1165,26 +1158,12 @@ static int hv_cpuid_check_and_set(CPUState *cs, int feature, Error **errp)
error_setg(errp, "Hyper-V %s requires Hyper-V %s",
kvm_hyperv_properties[feature].desc,
kvm_hyperv_properties[dep_feat].desc);
- return 1;
+ return false;
}
deps &= ~(1ull << dep_feat);
}
- if (!hyperv_feature_supported(cs, feature)) {
- if (hyperv_feat_enabled(cpu, feature)) {
- error_setg(errp, "Hyper-V %s is not supported by kernel",
- kvm_hyperv_properties[feature].desc);
- return 1;
- } else {
- return 0;
- }
- }
-
- if (cpu->hyperv_passthrough) {
- cpu->hyperv_features |= BIT(feature);
- }
-
- return 0;
+ return true;
}
static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg)
@@ -1220,12 +1199,23 @@ static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg)
* of 'hv_passthrough' mode and fills the environment with all supported
* Hyper-V features.
*/
-static void hyperv_expand_features(CPUState *cs, Error **errp)
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
{
- X86CPU *cpu = X86_CPU(cs);
+ CPUState *cs = CPU(cpu);
+ Error *local_err = NULL;
+ int feat;
if (!hyperv_enabled(cpu))
- return;
+ return true;
+
+ /*
+ * When kvm_hyperv_expand_features is called at CPU feature expansion
+ * time per-CPU kvm_state is not available yet so we can only proceed
+ * when KVM_CAP_SYS_HYPERV_CPUID is supported.
+ */
+ if (!cs->kvm_state &&
+ !kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID))
+ return true;
if (cpu->hyperv_passthrough) {
cpu->hyperv_vendor_id[0] =
@@ -1269,53 +1259,37 @@ static void hyperv_expand_features(CPUState *cs, Error **errp)
cpu->hyperv_spinlock_attempts =
hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EBX);
- }
- /* Features */
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_RELAXED, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_VAPIC, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_TIME, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_CRASH, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_RESET, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_VPINDEX, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_RUNTIME, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_SYNIC, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_STIMER, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_FREQUENCIES, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_REENLIGHTENMENT, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_TLBFLUSH, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_EVMCS, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_IPI, errp)) {
- return;
- }
- if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_STIMER_DIRECT, errp)) {
- return;
+ /*
+ * Mark feature as enabled in 'cpu->hyperv_features' as
+ * hv_build_cpuid_leaf() uses this info to build guest CPUIDs.
+ */
+ for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
+ if (hyperv_feature_supported(cs, feat)) {
+ cpu->hyperv_features |= BIT(feat);
+ }
+ }
+ } else {
+ /* Check features availability and dependencies */
+ for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
+ /* If the feature was not requested skip it. */
+ if (!hyperv_feat_enabled(cpu, feat)) {
+ continue;
+ }
+
+ /* Check if the feature is supported by KVM */
+ if (!hyperv_feature_supported(cs, feat)) {
+ error_setg(errp, "Hyper-V %s is not supported by kernel",
+ kvm_hyperv_properties[feat].desc);
+ return false;
+ }
+
+ /* Check dependencies */
+ if (!hv_feature_check_deps(cpu, feat, &local_err)) {
+ error_propagate(errp, local_err);
+ return false;
+ }
+ }
}
/* Additional dependencies not covered by kvm_hyperv_properties[] */
@@ -1325,7 +1299,10 @@ static void hyperv_expand_features(CPUState *cs, Error **errp)
error_setg(errp, "Hyper-V %s requires Hyper-V %s",
kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc,
kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc);
+ return false;
}
+
+ return true;
}
/*
@@ -1366,6 +1343,15 @@ static int hyperv_fill_cpuids(CPUState *cs,
c->ebx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EBX);
c->edx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EDX);
+ /* Unconditionally required with any Hyper-V enlightenment */
+ c->eax |= HV_HYPERCALL_AVAILABLE;
+
+ /* SynIC and Vmbus devices require messages/signals hypercalls */
+ if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
+ !cpu->hyperv_synic_kvm_only) {
+ c->ebx |= HV_POST_MESSAGES | HV_SIGNAL_EVENTS;
+ }
+
/* Not exposed by KVM but needed to make CPU hotplug in Windows work */
c->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
@@ -1409,6 +1395,21 @@ static int hyperv_fill_cpuids(CPUState *cs,
static Error *hv_passthrough_mig_blocker;
static Error *hv_no_nonarch_cs_mig_blocker;
+/* Checks that the exposed eVMCS version range is supported by KVM */
+static bool evmcs_version_supported(uint16_t evmcs_version,
+ uint16_t supported_evmcs_version)
+{
+ uint8_t min_version = evmcs_version & 0xff;
+ uint8_t max_version = evmcs_version >> 8;
+ uint8_t min_supported_version = supported_evmcs_version & 0xff;
+ uint8_t max_supported_version = supported_evmcs_version >> 8;
+
+ return (min_version >= min_supported_version) &&
+ (max_version <= max_supported_version);
+}
+
+#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
+
static int hyperv_init_vcpu(X86CPU *cpu)
{
CPUState *cs = CPU(cpu);
@@ -1488,17 +1489,33 @@ static int hyperv_init_vcpu(X86CPU *cpu)
}
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
- uint16_t evmcs_version;
+ uint16_t evmcs_version = DEFAULT_EVMCS_VERSION;
+ uint16_t supported_evmcs_version;
ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
- (uintptr_t)&evmcs_version);
+ (uintptr_t)&supported_evmcs_version);
+ /*
+ * KVM is required to support EVMCS ver.1. as that's what 'hv-evmcs'
+ * option sets. Note: we hardcode the maximum supported eVMCS version
+ * to '1' as well so 'hv-evmcs' feature is migratable even when (and if)
+ * ver.2 is implemented. A new option (e.g. 'hv-evmcs=2') will then have
+ * to be added.
+ */
if (ret < 0) {
- fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
- kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
+ error_report("Hyper-V %s is not supported by kernel",
+ kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
return ret;
}
+ if (!evmcs_version_supported(evmcs_version, supported_evmcs_version)) {
+ error_report("eVMCS version range [%d..%d] is not supported by "
+ "kernel (supported: [%d..%d])", evmcs_version & 0xff,
+ evmcs_version >> 8, supported_evmcs_version & 0xff,
+ supported_evmcs_version >> 8);
+ return -ENOTSUP;
+ }
+
cpu->hyperv_nested[0] = evmcs_version;
}
@@ -1559,9 +1576,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
- /* Paravirtualization CPUIDs */
- hyperv_expand_features(cs, &local_err);
- if (local_err) {
+ /*
+ * kvm_hyperv_expand_features() is called here for the second time in case
+ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
+ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
+ * check which Hyper-V enlightenments are supported and which are not, we
+ * can still proceed and check/expand Hyper-V enlightenments here so legacy
+ * behavior is preserved.
+ */
+ if (!kvm_hyperv_expand_features(cpu, &local_err)) {
error_report_err(local_err);
return -ENOSYS;
}
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index dc72508389..54667b35f0 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -47,6 +47,7 @@ bool kvm_has_x2apic_api(void);
bool kvm_has_waitpkg(void);
bool kvm_hv_vpindex_settable(void);
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);