diff options
Diffstat (limited to 'target/i386/machine.c')
-rw-r--r-- | target/i386/machine.c | 284 |
1 files changed, 282 insertions, 2 deletions
diff --git a/target/i386/machine.c b/target/i386/machine.c index 4aff1a763f..851b249d1a 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -231,6 +231,50 @@ static int cpu_pre_save(void *opaque) env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK); } +#ifdef CONFIG_KVM + /* Verify we have nested virtualization state from kernel if required */ + if (kvm_enabled() && cpu_has_vmx(env) && !env->nested_state) { + error_report("Guest enabled nested virtualization but kernel " + "does not support saving of nested state"); + return -EINVAL; + } +#endif + + /* + * When vCPU is running L2 and exception is still pending, + * it can potentially be intercepted by L1 hypervisor. + * In contrast to an injected exception which cannot be + * intercepted anymore. + * + * Furthermore, when a L2 exception is intercepted by L1 + * hypervisor, it's exception payload (CR2/DR6 on #PF/#DB) + * should not be set yet in the respective vCPU register. + * Thus, in case an exception is pending, it is + * important to save the exception payload seperately. + * + * Therefore, if an exception is not in a pending state + * or vCPU is not in guest-mode, it is not important to + * distinguish between a pending and injected exception + * and we don't need to store seperately the exception payload. + * + * In order to preserve better backwards-compatabile migration, + * convert a pending exception to an injected exception in + * case it is not important to distingiush between them + * as described above. + */ + if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) { + env->exception_pending = 0; + env->exception_injected = 1; + + if (env->exception_has_payload) { + if (env->exception_nr == EXCP01_DB) { + env->dr[6] = env->exception_payload; + } else if (env->exception_nr == EXCP0E_PAGE) { + env->cr[2] = env->exception_payload; + } + } + } + return 0; } @@ -278,6 +322,33 @@ static int cpu_post_load(void *opaque, int version_id) env->hflags &= ~HF_CPL_MASK; env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; +#ifdef CONFIG_KVM + if ((env->hflags & HF_GUEST_MASK) && + (!env->nested_state || + !(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) { + error_report("vCPU set in guest-mode inconsistent with " + "migrated kernel nested state"); + return -EINVAL; + } +#endif + + /* + * There are cases that we can get valid exception_nr with both + * exception_pending and exception_injected being cleared. + * This can happen in one of the following scenarios: + * 1) Source is older QEMU without KVM_CAP_EXCEPTION_PAYLOAD support. + * 2) Source is running on kernel without KVM_CAP_EXCEPTION_PAYLOAD support. + * 3) "cpu/exception_info" subsection not sent because there is no exception + * pending or guest wasn't running L2 (See comment in cpu_pre_save()). + * + * In those cases, we can just deduce that a valid exception_nr means + * we can treat the exception as already injected. + */ + if ((env->exception_nr != -1) && + !env->exception_pending && !env->exception_injected) { + env->exception_injected = 1; + } + env->fpstt = (env->fpus_vmstate >> 11) & 7; env->fpus = env->fpus_vmstate & ~0x3800; env->fptag_vmstate ^= 0xff; @@ -323,6 +394,35 @@ static bool steal_time_msr_needed(void *opaque) return cpu->env.steal_time_msr != 0; } +static bool exception_info_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + /* + * It is important to save exception-info only in case + * we need to distingiush between a pending and injected + * exception. Which is only required in case there is a + * pending exception and vCPU is running L2. + * For more info, refer to comment in cpu_pre_save(). + */ + return env->exception_pending && (env->hflags & HF_GUEST_MASK); +} + +static const VMStateDescription vmstate_exception_info = { + .name = "cpu/exception_info", + .version_id = 1, + .minimum_version_id = 1, + .needed = exception_info_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.exception_pending, X86CPU), + VMSTATE_UINT8(env.exception_injected, X86CPU), + VMSTATE_UINT8(env.exception_has_payload, X86CPU), + VMSTATE_UINT64(env.exception_payload, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_steal_time_msr = { .name = "cpu/steal_time_msr", .version_id = 1, @@ -634,7 +734,7 @@ static bool hyperv_runtime_enable_needed(void *opaque) X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; - if (!cpu->hyperv_runtime) { + if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_RUNTIME)) { return false; } @@ -851,6 +951,182 @@ static const VMStateDescription vmstate_tsc_khz = { } }; +#ifdef CONFIG_KVM + +static bool vmx_vmcs12_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + return (nested_state->size > + offsetof(struct kvm_nested_state, data.vmx[0].vmcs12)); +} + +static const VMStateDescription vmstate_vmx_vmcs12 = { + .name = "cpu/kvm_nested_state/vmx/vmcs12", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_vmcs12_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12, + struct kvm_nested_state, + KVM_STATE_NESTED_VMX_VMCS_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool vmx_shadow_vmcs12_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + return (nested_state->size > + offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12)); +} + +static const VMStateDescription vmstate_vmx_shadow_vmcs12 = { + .name = "cpu/kvm_nested_state/vmx/shadow_vmcs12", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_shadow_vmcs12_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12, + struct kvm_nested_state, + KVM_STATE_NESTED_VMX_VMCS_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool vmx_nested_state_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + + return ((nested_state->format == KVM_STATE_NESTED_FORMAT_VMX) && + ((nested_state->hdr.vmx.vmxon_pa != -1ull) || + (nested_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))); +} + +static const VMStateDescription vmstate_vmx_nested_state = { + .name = "cpu/kvm_nested_state/vmx", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_nested_state_needed, + .fields = (VMStateField[]) { + VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state), + VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state), + VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_vmx_vmcs12, + &vmstate_vmx_shadow_vmcs12, + NULL, + } +}; + +static bool svm_nested_state_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + + return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM); +} + +static const VMStateDescription vmstate_svm_nested_state = { + .name = "cpu/kvm_nested_state/svm", + .version_id = 1, + .minimum_version_id = 1, + .needed = svm_nested_state_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +static bool nested_state_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return (env->nested_state && + (vmx_nested_state_needed(env->nested_state) || + svm_nested_state_needed(env->nested_state))); +} + +static int nested_state_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + struct kvm_nested_state *nested_state = env->nested_state; + int min_nested_state_len = offsetof(struct kvm_nested_state, data); + int max_nested_state_len = kvm_max_nested_state_length(); + + /* + * If our kernel don't support setting nested state + * and we have received nested state from migration stream, + * we need to fail migration + */ + if (max_nested_state_len <= 0) { + error_report("Received nested state when kernel cannot restore it"); + return -EINVAL; + } + + /* + * Verify that the size of received nested_state struct + * at least cover required header and is not larger + * than the max size that our kernel support + */ + if (nested_state->size < min_nested_state_len) { + error_report("Received nested state size less than min: " + "len=%d, min=%d", + nested_state->size, min_nested_state_len); + return -EINVAL; + } + if (nested_state->size > max_nested_state_len) { + error_report("Recieved unsupported nested state size: " + "nested_state->size=%d, max=%d", + nested_state->size, max_nested_state_len); + return -EINVAL; + } + + /* Verify format is valid */ + if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) && + (nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) { + error_report("Received invalid nested state format: %d", + nested_state->format); + return -EINVAL; + } + + return 0; +} + +static const VMStateDescription vmstate_kvm_nested_state = { + .name = "cpu/kvm_nested_state", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_U16(flags, struct kvm_nested_state), + VMSTATE_U16(format, struct kvm_nested_state), + VMSTATE_U32(size, struct kvm_nested_state), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_vmx_nested_state, + &vmstate_svm_nested_state, + NULL + } +}; + +static const VMStateDescription vmstate_nested_state = { + .name = "cpu/nested_state", + .version_id = 1, + .minimum_version_id = 1, + .needed = nested_state_needed, + .post_load = nested_state_post_load, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU, + vmstate_kvm_nested_state, + struct kvm_nested_state), + VMSTATE_END_OF_LIST() + } +}; + +#endif + static bool mcg_ext_ctl_needed(void *opaque) { X86CPU *cpu = opaque; @@ -1056,7 +1332,7 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_INT32(env.interrupt_injected, X86CPU), VMSTATE_UINT32(env.mp_state, X86CPU), VMSTATE_UINT64(env.tsc, X86CPU), - VMSTATE_INT32(env.exception_injected, X86CPU), + VMSTATE_INT32(env.exception_nr, X86CPU), VMSTATE_UINT8(env.soft_interrupt, X86CPU), VMSTATE_UINT8(env.nmi_injected, X86CPU), VMSTATE_UINT8(env.nmi_pending, X86CPU), @@ -1080,6 +1356,7 @@ VMStateDescription vmstate_x86_cpu = { /* The above list is not sorted /wrt version numbers, watch out! */ }, .subsections = (const VMStateDescription*[]) { + &vmstate_exception_info, &vmstate_async_pf_msr, &vmstate_pv_eoi_msr, &vmstate_steal_time_msr, @@ -1113,6 +1390,9 @@ VMStateDescription vmstate_x86_cpu = { #ifndef TARGET_X86_64 &vmstate_efer32, #endif +#ifdef CONFIG_KVM + &vmstate_nested_state, +#endif NULL } }; |