aboutsummaryrefslogtreecommitdiff
path: root/target/i386/machine.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/i386/machine.c')
-rw-r--r--target/i386/machine.c284
1 files changed, 282 insertions, 2 deletions
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 4aff1a763f..851b249d1a 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -231,6 +231,50 @@ static int cpu_pre_save(void *opaque)
env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
}
+#ifdef CONFIG_KVM
+ /* Verify we have nested virtualization state from kernel if required */
+ if (kvm_enabled() && cpu_has_vmx(env) && !env->nested_state) {
+ error_report("Guest enabled nested virtualization but kernel "
+ "does not support saving of nested state");
+ return -EINVAL;
+ }
+#endif
+
+ /*
+ * When vCPU is running L2 and exception is still pending,
+ * it can potentially be intercepted by L1 hypervisor.
+ * In contrast to an injected exception which cannot be
+ * intercepted anymore.
+ *
+ * Furthermore, when a L2 exception is intercepted by L1
+ * hypervisor, it's exception payload (CR2/DR6 on #PF/#DB)
+ * should not be set yet in the respective vCPU register.
+ * Thus, in case an exception is pending, it is
+ * important to save the exception payload seperately.
+ *
+ * Therefore, if an exception is not in a pending state
+ * or vCPU is not in guest-mode, it is not important to
+ * distinguish between a pending and injected exception
+ * and we don't need to store seperately the exception payload.
+ *
+ * In order to preserve better backwards-compatabile migration,
+ * convert a pending exception to an injected exception in
+ * case it is not important to distingiush between them
+ * as described above.
+ */
+ if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) {
+ env->exception_pending = 0;
+ env->exception_injected = 1;
+
+ if (env->exception_has_payload) {
+ if (env->exception_nr == EXCP01_DB) {
+ env->dr[6] = env->exception_payload;
+ } else if (env->exception_nr == EXCP0E_PAGE) {
+ env->cr[2] = env->exception_payload;
+ }
+ }
+ }
+
return 0;
}
@@ -278,6 +322,33 @@ static int cpu_post_load(void *opaque, int version_id)
env->hflags &= ~HF_CPL_MASK;
env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+#ifdef CONFIG_KVM
+ if ((env->hflags & HF_GUEST_MASK) &&
+ (!env->nested_state ||
+ !(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) {
+ error_report("vCPU set in guest-mode inconsistent with "
+ "migrated kernel nested state");
+ return -EINVAL;
+ }
+#endif
+
+ /*
+ * There are cases that we can get valid exception_nr with both
+ * exception_pending and exception_injected being cleared.
+ * This can happen in one of the following scenarios:
+ * 1) Source is older QEMU without KVM_CAP_EXCEPTION_PAYLOAD support.
+ * 2) Source is running on kernel without KVM_CAP_EXCEPTION_PAYLOAD support.
+ * 3) "cpu/exception_info" subsection not sent because there is no exception
+ * pending or guest wasn't running L2 (See comment in cpu_pre_save()).
+ *
+ * In those cases, we can just deduce that a valid exception_nr means
+ * we can treat the exception as already injected.
+ */
+ if ((env->exception_nr != -1) &&
+ !env->exception_pending && !env->exception_injected) {
+ env->exception_injected = 1;
+ }
+
env->fpstt = (env->fpus_vmstate >> 11) & 7;
env->fpus = env->fpus_vmstate & ~0x3800;
env->fptag_vmstate ^= 0xff;
@@ -323,6 +394,35 @@ static bool steal_time_msr_needed(void *opaque)
return cpu->env.steal_time_msr != 0;
}
+static bool exception_info_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ /*
+ * It is important to save exception-info only in case
+ * we need to distingiush between a pending and injected
+ * exception. Which is only required in case there is a
+ * pending exception and vCPU is running L2.
+ * For more info, refer to comment in cpu_pre_save().
+ */
+ return env->exception_pending && (env->hflags & HF_GUEST_MASK);
+}
+
+static const VMStateDescription vmstate_exception_info = {
+ .name = "cpu/exception_info",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = exception_info_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8(env.exception_pending, X86CPU),
+ VMSTATE_UINT8(env.exception_injected, X86CPU),
+ VMSTATE_UINT8(env.exception_has_payload, X86CPU),
+ VMSTATE_UINT64(env.exception_payload, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_steal_time_msr = {
.name = "cpu/steal_time_msr",
.version_id = 1,
@@ -634,7 +734,7 @@ static bool hyperv_runtime_enable_needed(void *opaque)
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
- if (!cpu->hyperv_runtime) {
+ if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_RUNTIME)) {
return false;
}
@@ -851,6 +951,182 @@ static const VMStateDescription vmstate_tsc_khz = {
}
};
+#ifdef CONFIG_KVM
+
+static bool vmx_vmcs12_needed(void *opaque)
+{
+ struct kvm_nested_state *nested_state = opaque;
+ return (nested_state->size >
+ offsetof(struct kvm_nested_state, data.vmx[0].vmcs12));
+}
+
+static const VMStateDescription vmstate_vmx_vmcs12 = {
+ .name = "cpu/kvm_nested_state/vmx/vmcs12",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vmx_vmcs12_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12,
+ struct kvm_nested_state,
+ KVM_STATE_NESTED_VMX_VMCS_SIZE),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool vmx_shadow_vmcs12_needed(void *opaque)
+{
+ struct kvm_nested_state *nested_state = opaque;
+ return (nested_state->size >
+ offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12));
+}
+
+static const VMStateDescription vmstate_vmx_shadow_vmcs12 = {
+ .name = "cpu/kvm_nested_state/vmx/shadow_vmcs12",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vmx_shadow_vmcs12_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12,
+ struct kvm_nested_state,
+ KVM_STATE_NESTED_VMX_VMCS_SIZE),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool vmx_nested_state_needed(void *opaque)
+{
+ struct kvm_nested_state *nested_state = opaque;
+
+ return ((nested_state->format == KVM_STATE_NESTED_FORMAT_VMX) &&
+ ((nested_state->hdr.vmx.vmxon_pa != -1ull) ||
+ (nested_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)));
+}
+
+static const VMStateDescription vmstate_vmx_nested_state = {
+ .name = "cpu/kvm_nested_state/vmx",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vmx_nested_state_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state),
+ VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state),
+ VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state),
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_vmx_vmcs12,
+ &vmstate_vmx_shadow_vmcs12,
+ NULL,
+ }
+};
+
+static bool svm_nested_state_needed(void *opaque)
+{
+ struct kvm_nested_state *nested_state = opaque;
+
+ return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM);
+}
+
+static const VMStateDescription vmstate_svm_nested_state = {
+ .name = "cpu/kvm_nested_state/svm",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = svm_nested_state_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool nested_state_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ return (env->nested_state &&
+ (vmx_nested_state_needed(env->nested_state) ||
+ svm_nested_state_needed(env->nested_state)));
+}
+
+static int nested_state_post_load(void *opaque, int version_id)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+ struct kvm_nested_state *nested_state = env->nested_state;
+ int min_nested_state_len = offsetof(struct kvm_nested_state, data);
+ int max_nested_state_len = kvm_max_nested_state_length();
+
+ /*
+ * If our kernel don't support setting nested state
+ * and we have received nested state from migration stream,
+ * we need to fail migration
+ */
+ if (max_nested_state_len <= 0) {
+ error_report("Received nested state when kernel cannot restore it");
+ return -EINVAL;
+ }
+
+ /*
+ * Verify that the size of received nested_state struct
+ * at least cover required header and is not larger
+ * than the max size that our kernel support
+ */
+ if (nested_state->size < min_nested_state_len) {
+ error_report("Received nested state size less than min: "
+ "len=%d, min=%d",
+ nested_state->size, min_nested_state_len);
+ return -EINVAL;
+ }
+ if (nested_state->size > max_nested_state_len) {
+ error_report("Recieved unsupported nested state size: "
+ "nested_state->size=%d, max=%d",
+ nested_state->size, max_nested_state_len);
+ return -EINVAL;
+ }
+
+ /* Verify format is valid */
+ if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) &&
+ (nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) {
+ error_report("Received invalid nested state format: %d",
+ nested_state->format);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_kvm_nested_state = {
+ .name = "cpu/kvm_nested_state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_U16(flags, struct kvm_nested_state),
+ VMSTATE_U16(format, struct kvm_nested_state),
+ VMSTATE_U32(size, struct kvm_nested_state),
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_vmx_nested_state,
+ &vmstate_svm_nested_state,
+ NULL
+ }
+};
+
+static const VMStateDescription vmstate_nested_state = {
+ .name = "cpu/nested_state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = nested_state_needed,
+ .post_load = nested_state_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU,
+ vmstate_kvm_nested_state,
+ struct kvm_nested_state),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#endif
+
static bool mcg_ext_ctl_needed(void *opaque)
{
X86CPU *cpu = opaque;
@@ -1056,7 +1332,7 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_INT32(env.interrupt_injected, X86CPU),
VMSTATE_UINT32(env.mp_state, X86CPU),
VMSTATE_UINT64(env.tsc, X86CPU),
- VMSTATE_INT32(env.exception_injected, X86CPU),
+ VMSTATE_INT32(env.exception_nr, X86CPU),
VMSTATE_UINT8(env.soft_interrupt, X86CPU),
VMSTATE_UINT8(env.nmi_injected, X86CPU),
VMSTATE_UINT8(env.nmi_pending, X86CPU),
@@ -1080,6 +1356,7 @@ VMStateDescription vmstate_x86_cpu = {
/* The above list is not sorted /wrt version numbers, watch out! */
},
.subsections = (const VMStateDescription*[]) {
+ &vmstate_exception_info,
&vmstate_async_pf_msr,
&vmstate_pv_eoi_msr,
&vmstate_steal_time_msr,
@@ -1113,6 +1390,9 @@ VMStateDescription vmstate_x86_cpu = {
#ifndef TARGET_X86_64
&vmstate_efer32,
#endif
+#ifdef CONFIG_KVM
+ &vmstate_nested_state,
+#endif
NULL
}
};