diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2011-02-01 15:23:24 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2011-02-01 15:32:18 -0600 |
commit | b3a98367eec7b2d87acca54ef5e4de3b0e0a7ed5 (patch) | |
tree | 22b6121e6f81c09c7dd780ed211afef7b9ebb92d /target-i386 | |
parent | 9363ee31ab53fc0fd39fbe5936d9c00a2f4e54a4 (diff) | |
parent | c5999bfcfdf66390c98115044cb6fd174fbcf36d (diff) |
Merge remote branch 'qemu-kvm/uq/master' into staging
aliguori: fix build with !defined(KVM_CAP_ASYNC_PF)
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Diffstat (limited to 'target-i386')
-rw-r--r-- | target-i386/cpu.h | 9 | ||||
-rw-r--r-- | target-i386/cpuid.c | 5 | ||||
-rw-r--r-- | target-i386/helper.c | 97 | ||||
-rw-r--r-- | target-i386/kvm.c | 751 | ||||
-rw-r--r-- | target-i386/kvm_x86.h | 5 |
5 files changed, 523 insertions, 344 deletions
diff --git a/target-i386/cpu.h b/target-i386/cpu.h index f0c07cde3b..af701a4412 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -699,6 +699,10 @@ typedef struct CPUX86State { uint32_t smbase; int old_exception; /* exception in flight */ + /* KVM states, automatically cleared on reset */ + uint8_t nmi_injected; + uint8_t nmi_pending; + CPU_COMMON /* processor features (e.g. for CPUID insn) */ @@ -726,8 +730,6 @@ typedef struct CPUX86State { int32_t exception_injected; int32_t interrupt_injected; uint8_t soft_interrupt; - uint8_t nmi_injected; - uint8_t nmi_pending; uint8_t has_error_code; uint32_t sipi_vector; uint32_t cpuid_kvm_features; @@ -760,6 +762,7 @@ int cpu_x86_exec(CPUX86State *s); void cpu_x86_close(CPUX86State *s); void x86_cpu_list (FILE *f, fprintf_function cpu_fprintf, const char *optarg); void x86_cpudef_setup(void); +int cpu_x86_support_mca_broadcast(CPUState *env); int cpu_get_pic_interrupt(CPUX86State *s); /* MSDOS compatibility mode FPU exception support */ @@ -873,6 +876,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *ecx, uint32_t *edx); int cpu_x86_register (CPUX86State *env, const char *cpu_model); void cpu_clear_apic_feature(CPUX86State *env); +void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); /* helper.c */ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index 165045ec42..5382a283f5 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -103,9 +103,8 @@ typedef struct model_features_t { int check_cpuid = 0; int enforce_cpuid = 0; -static void host_cpuid(uint32_t function, uint32_t count, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { #if defined(CONFIG_KVM) uint32_t vec[4]; diff --git a/target-i386/helper.c b/target-i386/helper.c index 25a3e36138..1217452e71 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -110,6 +110,32 @@ void cpu_x86_close(CPUX86State *env) qemu_free(env); } +static void cpu_x86_version(CPUState *env, int *family, int *model) +{ + int cpuver = env->cpuid_version; + + if (family == NULL || model == NULL) { + return; + } + + *family = (cpuver >> 8) & 0x0f; + *model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0x0f); +} + +/* Broadcast MCA signal for processor version 06H_EH and above */ +int cpu_x86_support_mca_broadcast(CPUState *env) +{ + int family = 0; + int model = 0; + + cpu_x86_version(env, &family, &model); + if ((family == 6 && model >= 14) || family > 6) { + return 1; + } + + return 0; +} + /***********************************************************/ /* x86 debug */ @@ -223,6 +249,9 @@ done: cpu_fprintf(f, "\n"); } +#define DUMP_CODE_BYTES_TOTAL 50 +#define DUMP_CODE_BYTES_BACKWARD 20 + void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -408,6 +437,24 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, cpu_fprintf(f, " "); } } + if (flags & CPU_DUMP_CODE) { + target_ulong base = env->segs[R_CS].base + env->eip; + target_ulong offs = MIN(env->eip, DUMP_CODE_BYTES_BACKWARD); + uint8_t code; + char codestr[3]; + + cpu_fprintf(f, "Code="); + for (i = 0; i < DUMP_CODE_BYTES_TOTAL; i++) { + if (cpu_memory_rw_debug(env, base - offs + i, &code, 1, 0) == 0) { + snprintf(codestr, sizeof(codestr), "%02x", code); + } else { + snprintf(codestr, sizeof(codestr), "??"); + } + cpu_fprintf(f, "%s%s%s%s", i > 0 ? " " : "", + i == offs ? "<" : "", codestr, i == offs ? ">" : ""); + } + cpu_fprintf(f, "\n"); + } } /***********************************************************/ @@ -1021,21 +1068,12 @@ static void breakpoint_handler(CPUState *env) /* This should come from sysemu.h - if we could include it here... */ void qemu_system_reset_request(void); -void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, +static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc) { uint64_t mcg_cap = cenv->mcg_cap; - unsigned bank_num = mcg_cap & 0xff; uint64_t *banks = cenv->mce_banks; - if (bank >= bank_num || !(status & MCI_STATUS_VAL)) - return; - - if (kvm_enabled()) { - kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0); - return; - } - /* * if MSR_MCG_CTL is not all 1s, the uncorrected error * reporting is disabled @@ -1076,6 +1114,45 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, } else banks[1] |= MCI_STATUS_OVER; } + +void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, + uint64_t mcg_status, uint64_t addr, uint64_t misc, + int broadcast) +{ + unsigned bank_num = cenv->mcg_cap & 0xff; + CPUState *env; + int flag = 0; + + if (bank >= bank_num || !(status & MCI_STATUS_VAL)) { + return; + } + + if (broadcast) { + if (!cpu_x86_support_mca_broadcast(cenv)) { + fprintf(stderr, "Current CPU does not support broadcast\n"); + return; + } + } + + if (kvm_enabled()) { + if (broadcast) { + flag |= MCE_BROADCAST; + } + + kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, flag); + } else { + qemu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc); + if (broadcast) { + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (cenv == env) { + continue; + } + + qemu_inject_x86_mce(env, 1, 0xa000000000000000, 0, 0, 0); + } + } + } +} #endif /* !CONFIG_USER_ONLY */ static void mce_init(CPUX86State *cenv) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7dfc357e42..05010bbc38 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -54,9 +54,19 @@ #define BUS_MCEERR_AO 5 #endif -static int lm_capable_kernel; +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), + KVM_CAP_INFO(MP_STATE), + KVM_CAP_LAST_INFO +}; -#ifdef KVM_CAP_EXT_CPUID +static bool has_msr_star; +static bool has_msr_hsave_pa; +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) +static bool has_msr_async_pf_en; +#endif +static int lm_capable_kernel; static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) { @@ -91,10 +101,6 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, uint32_t ret = 0; uint32_t cpuid_1_edx; - if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) { - return -1U; - } - max = 1; while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) { max *= 2; @@ -138,46 +144,33 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, return ret; } -#else - -uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, - uint32_t index, int reg) -{ - return -1U; -} - -#endif - #ifdef CONFIG_KVM_PARA struct kvm_para_features { - int cap; - int feature; + int cap; + int feature; } para_features[] = { -#ifdef KVM_CAP_CLOCKSOURCE - { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, -#endif -#ifdef KVM_CAP_NOP_IO_DELAY - { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, -#endif -#ifdef KVM_CAP_PV_MMU - { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, -#endif + { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, + { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, + { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, #ifdef KVM_CAP_ASYNC_PF - { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, + { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, #endif - { -1, -1 } + { -1, -1 } }; static int get_para_features(CPUState *env) { - int i, features = 0; + int i, features = 0; - for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) { - if (kvm_check_extension(env->kvm_state, para_features[i].cap)) - features |= (1 << para_features[i].feature); + for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) { + if (kvm_check_extension(env->kvm_state, para_features[i].cap)) { + features |= (1 << para_features[i].feature); } - - return features; + } +#ifdef KVM_CAP_ASYNC_PF + has_msr_async_pf_en = features & (1 << KVM_FEATURE_ASYNC_PF); +#endif + return features; } #endif @@ -187,7 +180,7 @@ static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, { int r; - r = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_MCE); + r = kvm_check_extension(s, KVM_CAP_MCE); if (r > 0) { *max_banks = r; return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); @@ -219,7 +212,7 @@ static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n) } /* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */ -static int kvm_mce_in_exception(CPUState *env) +static int kvm_mce_in_progress(CPUState *env) { struct kvm_msr_entry msr_mcg_status = { .index = MSR_MCG_STATUS, @@ -228,7 +221,8 @@ static int kvm_mce_in_exception(CPUState *env) r = kvm_get_msr(env, &msr_mcg_status, 1); if (r == -1 || r == 0) { - return -1; + fprintf(stderr, "Failed to get MCE status\n"); + return 0; } return !!(msr_mcg_status.data & MCG_STATUS_MCIP); } @@ -248,10 +242,7 @@ static void kvm_do_inject_x86_mce(void *_data) /* If there is an MCE exception being processed, ignore this SRAO MCE */ if ((data->env->mcg_cap & MCG_SER_P) && !(data->mce->status & MCI_STATUS_AR)) { - r = kvm_mce_in_exception(data->env); - if (r == -1) { - fprintf(stderr, "Failed to get MCE status\n"); - } else if (r) { + if (kvm_mce_in_progress(data->env)) { return; } } @@ -264,11 +255,30 @@ static void kvm_do_inject_x86_mce(void *_data) } } } + +static void kvm_inject_x86_mce_on(CPUState *env, struct kvm_x86_mce *mce, + int flag) +{ + struct kvm_x86_mce_data data = { + .env = env, + .mce = mce, + .abort_on_error = (flag & ABORT_ON_ERROR), + }; + + if (!env->mcg_cap) { + fprintf(stderr, "MCE support is not enabled!\n"); + return; + } + + run_on_cpu(env, kvm_do_inject_x86_mce, &data); +} + +static void kvm_mce_broadcast_rest(CPUState *env); #endif void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, - int abort_on_error) + int flag) { #ifdef KVM_CAP_MCE struct kvm_x86_mce mce = { @@ -278,20 +288,16 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, .addr = addr, .misc = misc, }; - struct kvm_x86_mce_data data = { - .env = cenv, - .mce = &mce, - }; - if (!cenv->mcg_cap) { - fprintf(stderr, "MCE support is not enabled!\n"); - return; + if (flag & MCE_BROADCAST) { + kvm_mce_broadcast_rest(cenv); } - run_on_cpu(cenv, kvm_do_inject_x86_mce, &data); + kvm_inject_x86_mce_on(cenv, &mce, flag); #else - if (abort_on_error) + if (flag & ABORT_ON_ERROR) { abort(); + } #endif } @@ -304,12 +310,10 @@ int kvm_arch_init_vcpu(CPUState *env) uint32_t limit, i, j, cpuid_i; uint32_t unused; struct kvm_cpuid_entry2 *c; -#ifdef KVM_CPUID_SIGNATURE +#ifdef CONFIG_KVM_PARA uint32_t signature[3]; #endif - env->mp_state = KVM_MP_STATE_RUNNABLE; - env->cpuid_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX); i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR; @@ -376,13 +380,15 @@ int kvm_arch_init_vcpu(CPUState *env) c->index = j; cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - if (i == 4 && c->eax == 0) + if (i == 4 && c->eax == 0) { break; - if (i == 0xb && !(c->ecx & 0xff00)) + } + if (i == 0xb && !(c->ecx & 0xff00)) { break; - if (i == 0xd && c->eax == 0) + } + if (i == 0xd && c->eax == 0) { break; - + } c = &cpuid_data.entries[cpuid_i++]; } break; @@ -412,17 +418,18 @@ int kvm_arch_init_vcpu(CPUState *env) uint64_t mcg_cap; int banks; - if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) + if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) { perror("kvm_get_mce_cap_supported FAILED"); - else { + } else { if (banks > MCE_BANKS_DEF) banks = MCE_BANKS_DEF; mcg_cap &= MCE_CAP_DEF; mcg_cap |= banks; - if (kvm_setup_mce(env, &mcg_cap)) + if (kvm_setup_mce(env, &mcg_cap)) { perror("kvm_setup_mce FAILED"); - else + } else { env->mcg_cap = mcg_cap; + } } } #endif @@ -434,8 +441,7 @@ void kvm_arch_reset_vcpu(CPUState *env) { env->exception_injected = -1; env->interrupt_injected = -1; - env->nmi_injected = 0; - env->nmi_pending = 0; + env->xcr0 = 1; if (kvm_irqchip_in_kernel()) { env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE : KVM_MP_STATE_UNINITIALIZED; @@ -444,13 +450,10 @@ void kvm_arch_reset_vcpu(CPUState *env) } } -int has_msr_star; -int has_msr_hsave_pa; - -static void kvm_supported_msrs(CPUState *env) +static int kvm_get_supported_msrs(KVMState *s) { static int kvm_supported_msrs; - int ret; + int ret = 0; /* first time */ if (kvm_supported_msrs == 0) { @@ -461,9 +464,9 @@ static void kvm_supported_msrs(CPUState *env) /* Obtain MSR list from KVM. These are the MSRs that we must * save/restore */ msr_list.nmsrs = 0; - ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list); + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list); if (ret < 0 && ret != -E2BIG) { - return; + return ret; } /* Old kernel modules had a bug and could write beyond the provided memory. Allocate at least a safe amount of 1K. */ @@ -472,17 +475,17 @@ static void kvm_supported_msrs(CPUState *env) sizeof(msr_list.indices[0]))); kvm_msr_list->nmsrs = msr_list.nmsrs; - ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); if (ret >= 0) { int i; for (i = 0; i < kvm_msr_list->nmsrs; i++) { if (kvm_msr_list->indices[i] == MSR_STAR) { - has_msr_star = 1; + has_msr_star = true; continue; } if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) { - has_msr_hsave_pa = 1; + has_msr_hsave_pa = true; continue; } } @@ -491,80 +494,61 @@ static void kvm_supported_msrs(CPUState *env) free(kvm_msr_list); } - return; -} - -static int kvm_has_msr_hsave_pa(CPUState *env) -{ - kvm_supported_msrs(env); - return has_msr_hsave_pa; -} - -static int kvm_has_msr_star(CPUState *env) -{ - kvm_supported_msrs(env); - return has_msr_star; + return ret; } -static int kvm_init_identity_map_page(KVMState *s) +int kvm_arch_init(KVMState *s) { -#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR + uint64_t identity_base = 0xfffbc000; int ret; - uint64_t addr = 0xfffbc000; - - if (!kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { - return 0; - } + struct utsname utsname; - ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &addr); + ret = kvm_get_supported_msrs(s); if (ret < 0) { - fprintf(stderr, "kvm_set_identity_map_addr: %s\n", strerror(ret)); return ret; } -#endif - return 0; -} - -int kvm_arch_init(KVMState *s, int smp_cpus) -{ - int ret; - - struct utsname utsname; uname(&utsname); lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; - /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code - * directly. In order to use vm86 mode, a TSS is needed. Since this - * must be part of guest physical memory, we need to allocate it. Older - * versions of KVM just assumed that it would be at the end of physical - * memory but that doesn't work with more than 4GB of memory. We simply - * refuse to work with those older versions of KVM. */ - ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); - if (ret <= 0) { - fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n"); - return ret; - } - - /* this address is 3 pages before the bios, and the bios should present - * as unavaible memory. FIXME, need to ensure the e820 map deals with - * this? - */ /* - * Tell fw_cfg to notify the BIOS to reserve the range. + * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. + * In order to use vm86 mode, an EPT identity map and a TSS are needed. + * Since these must be part of guest physical memory, we need to allocate + * them, both by setting their start addresses in the kernel and by + * creating a corresponding e820 entry. We need 4 pages before the BIOS. + * + * Older KVM versions may not support setting the identity map base. In + * that case we need to stick with the default, i.e. a 256K maximum BIOS + * size. */ - if (e820_add_entry(0xfffbc000, 0x4000, E820_RESERVED) < 0) { - perror("e820_add_entry() table is full"); - exit(1); +#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR + if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { + /* Allows up to 16M BIOSes. */ + identity_base = 0xfeffc000; + + ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); + if (ret < 0) { + return ret; + } } - ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000); +#endif + /* Set TSS base one page after EPT identity map. */ + ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000); if (ret < 0) { return ret; } - return kvm_init_identity_map_page(s); + /* Tell fw_cfg to notify the BIOS to reserve the range. */ + ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED); + if (ret < 0) { + fprintf(stderr, "e820_add_entry() table is full\n"); + return ret; + } + + return 0; } - + static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) { lhs->selector = rhs->selector; @@ -589,7 +573,7 @@ static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) lhs->limit = rhs->limit; lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; lhs->present = (flags & DESC_P_MASK) != 0; - lhs->dpl = rhs->selector & 3; + lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3; lhs->db = (flags >> DESC_B_SHIFT) & 1; lhs->s = (flags & DESC_S_MASK) != 0; lhs->l = (flags >> DESC_L_SHIFT) & 1; @@ -603,23 +587,23 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) lhs->selector = rhs->selector; lhs->base = rhs->base; lhs->limit = rhs->limit; - lhs->flags = - (rhs->type << DESC_TYPE_SHIFT) - | (rhs->present * DESC_P_MASK) - | (rhs->dpl << DESC_DPL_SHIFT) - | (rhs->db << DESC_B_SHIFT) - | (rhs->s * DESC_S_MASK) - | (rhs->l << DESC_L_SHIFT) - | (rhs->g * DESC_G_MASK) - | (rhs->avl * DESC_AVL_MASK); + lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | + (rhs->present * DESC_P_MASK) | + (rhs->dpl << DESC_DPL_SHIFT) | + (rhs->db << DESC_B_SHIFT) | + (rhs->s * DESC_S_MASK) | + (rhs->l << DESC_L_SHIFT) | + (rhs->g * DESC_G_MASK) | + (rhs->avl * DESC_AVL_MASK); } static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) { - if (set) + if (set) { *kvm_reg = *qemu_reg; - else + } else { *qemu_reg = *kvm_reg; + } } static int kvm_getput_regs(CPUState *env, int set) @@ -629,8 +613,9 @@ static int kvm_getput_regs(CPUState *env, int set) if (!set) { ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s); - if (ret < 0) + if (ret < 0) { return ret; + } } kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); @@ -655,8 +640,9 @@ static int kvm_getput_regs(CPUState *env, int set) kvm_getput_reg(®s.rflags, &env->eflags, set); kvm_getput_reg(®s.rip, &env->eip, set); - if (set) + if (set) { ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s); + } return ret; } @@ -670,8 +656,9 @@ static int kvm_put_fpu(CPUState *env) fpu.fsw = env->fpus & ~(7 << 11); fpu.fsw |= (env->fpstt & 7) << 11; fpu.fcw = env->fpuc; - for (i = 0; i < 8; ++i) - fpu.ftwx |= (!env->fptags[i]) << i; + for (i = 0; i < 8; ++i) { + fpu.ftwx |= (!env->fptags[i]) << i; + } memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs); fpu.mxcsr = env->mxcsr; @@ -696,8 +683,9 @@ static int kvm_put_xsave(CPUState *env) struct kvm_xsave* xsave; uint16_t cwd, swd, twd, fop; - if (!kvm_has_xsave()) + if (!kvm_has_xsave()) { return kvm_put_fpu(env); + } xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); memset(xsave, 0, sizeof(struct kvm_xsave)); @@ -705,8 +693,9 @@ static int kvm_put_xsave(CPUState *env) swd = env->fpus & ~(7 << 11); swd |= (env->fpstt & 7) << 11; cwd = env->fpuc; - for (i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { twd |= (!env->fptags[i]) << i; + } xsave->region[0] = (uint32_t)(swd << 16) + cwd; xsave->region[1] = (uint32_t)(fop << 16) + twd; memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs, @@ -730,8 +719,9 @@ static int kvm_put_xcrs(CPUState *env) #ifdef KVM_CAP_XCRS struct kvm_xcrs xcrs; - if (!kvm_has_xcrs()) + if (!kvm_has_xcrs()) { return 0; + } xcrs.nr_xcrs = 1; xcrs.flags = 0; @@ -754,26 +744,19 @@ static int kvm_put_sregs(CPUState *env) } if ((env->eflags & VM_MASK)) { - set_v8086_seg(&sregs.cs, &env->segs[R_CS]); - set_v8086_seg(&sregs.ds, &env->segs[R_DS]); - set_v8086_seg(&sregs.es, &env->segs[R_ES]); - set_v8086_seg(&sregs.fs, &env->segs[R_FS]); - set_v8086_seg(&sregs.gs, &env->segs[R_GS]); - set_v8086_seg(&sregs.ss, &env->segs[R_SS]); + set_v8086_seg(&sregs.cs, &env->segs[R_CS]); + set_v8086_seg(&sregs.ds, &env->segs[R_DS]); + set_v8086_seg(&sregs.es, &env->segs[R_ES]); + set_v8086_seg(&sregs.fs, &env->segs[R_FS]); + set_v8086_seg(&sregs.gs, &env->segs[R_GS]); + set_v8086_seg(&sregs.ss, &env->segs[R_SS]); } else { - set_seg(&sregs.cs, &env->segs[R_CS]); - set_seg(&sregs.ds, &env->segs[R_DS]); - set_seg(&sregs.es, &env->segs[R_ES]); - set_seg(&sregs.fs, &env->segs[R_FS]); - set_seg(&sregs.gs, &env->segs[R_GS]); - set_seg(&sregs.ss, &env->segs[R_SS]); - - if (env->cr[0] & CR0_PE_MASK) { - /* force ss cpl to cs cpl */ - sregs.ss.selector = (sregs.ss.selector & ~3) | - (sregs.cs.selector & 3); - sregs.ss.dpl = sregs.ss.selector & 3; - } + set_seg(&sregs.cs, &env->segs[R_CS]); + set_seg(&sregs.ds, &env->segs[R_DS]); + set_seg(&sregs.es, &env->segs[R_ES]); + set_seg(&sregs.fs, &env->segs[R_FS]); + set_seg(&sregs.gs, &env->segs[R_GS]); + set_seg(&sregs.ss, &env->segs[R_SS]); } set_seg(&sregs.tr, &env->tr); @@ -816,10 +799,12 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - if (kvm_has_msr_star(env)) - kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); - if (kvm_has_msr_hsave_pa(env)) + if (has_msr_star) { + kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); + } + if (has_msr_hsave_pa) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -838,23 +823,35 @@ static int kvm_put_msrs(CPUState *env, int level) if (smp_cpus == 1 || env->tsc != 0) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); } + } + /* + * The following paravirtual MSRs have side effects on the guest or are + * too heavy for normal writeback. Limit them to reset or full state + * updates. + */ + if (level >= KVM_PUT_RESET_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); -#ifdef KVM_CAP_ASYNC_PF - kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) + if (has_msr_async_pf_en) { + kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN, + env->async_pf_en_msr); + } #endif } #ifdef KVM_CAP_MCE if (env->mcg_cap) { int i; - if (level == KVM_PUT_RESET_STATE) + + if (level == KVM_PUT_RESET_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); - else if (level == KVM_PUT_FULL_STATE) { + } else if (level == KVM_PUT_FULL_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); + } } } #endif @@ -872,14 +869,16 @@ static int kvm_get_fpu(CPUState *env) int i, ret; ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu); - if (ret < 0) + if (ret < 0) { return ret; + } env->fpstt = (fpu.fsw >> 11) & 7; env->fpus = fpu.fsw; env->fpuc = fpu.fcw; - for (i = 0; i < 8; ++i) - env->fptags[i] = !((fpu.ftwx >> i) & 1); + for (i = 0; i < 8; ++i) { + env->fptags[i] = !((fpu.ftwx >> i) & 1); + } memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs); env->mxcsr = fpu.mxcsr; @@ -894,8 +893,9 @@ static int kvm_get_xsave(CPUState *env) int ret, i; uint16_t cwd, swd, twd, fop; - if (!kvm_has_xsave()) + if (!kvm_has_xsave()) { return kvm_get_fpu(env); + } xsave = qemu_memalign(4096, sizeof(struct kvm_xsave)); ret = kvm_vcpu_ioctl(env, KVM_GET_XSAVE, xsave); @@ -911,8 +911,9 @@ static int kvm_get_xsave(CPUState *env) env->fpstt = (swd >> 11) & 7; env->fpus = swd; env->fpuc = cwd; - for (i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { env->fptags[i] = !((twd >> i) & 1); + } env->mxcsr = xsave->region[XSAVE_MXCSR]; memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE], sizeof env->fpregs); @@ -934,19 +935,22 @@ static int kvm_get_xcrs(CPUState *env) int i, ret; struct kvm_xcrs xcrs; - if (!kvm_has_xcrs()) + if (!kvm_has_xcrs()) { return 0; + } ret = kvm_vcpu_ioctl(env, KVM_GET_XCRS, &xcrs); - if (ret < 0) + if (ret < 0) { return ret; + } - for (i = 0; i < xcrs.nr_xcrs; i++) + for (i = 0; i < xcrs.nr_xcrs; i++) { /* Only support xcr0 now */ if (xcrs.xcrs[0].xcr == 0) { env->xcr0 = xcrs.xcrs[0].value; break; } + } return 0; #else return 0; @@ -960,8 +964,9 @@ static int kvm_get_sregs(CPUState *env) int bit, i, ret; ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); - if (ret < 0) + if (ret < 0) { return ret; + } /* There can only be one pending IRQ set in the bitmap at a time, so try to find it and save its number instead (-1 for none). */ @@ -999,21 +1004,19 @@ static int kvm_get_sregs(CPUState *env) env->efer = sregs.efer; //cpu_set_apic_tpr(env->apic_state, sregs.cr8); -#define HFLAG_COPY_MASK ~( \ - HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ - HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ - HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ - HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) - - +#define HFLAG_COPY_MASK \ + ~( HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ + HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ + HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ + HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & - (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); + (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << - (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); + (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); if (env->efer & MSR_EFER_LMA) { hflags |= HF_LMA_MASK; @@ -1023,19 +1026,16 @@ static int kvm_get_sregs(CPUState *env) hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; } else { hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> - (DESC_B_SHIFT - HF_CS32_SHIFT); + (DESC_B_SHIFT - HF_CS32_SHIFT); hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> - (DESC_B_SHIFT - HF_SS32_SHIFT); - if (!(env->cr[0] & CR0_PE_MASK) || - (env->eflags & VM_MASK) || - !(hflags & HF_CS32_MASK)) { - hflags |= HF_ADDSEG_MASK; - } else { - hflags |= ((env->segs[R_DS].base | - env->segs[R_ES].base | - env->segs[R_SS].base) != 0) << - HF_ADDSEG_SHIFT; - } + (DESC_B_SHIFT - HF_SS32_SHIFT); + if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK) || + !(hflags & HF_CS32_MASK)) { + hflags |= HF_ADDSEG_MASK; + } else { + hflags |= ((env->segs[R_DS].base | env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT; + } } env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; @@ -1055,10 +1055,12 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_IA32_SYSENTER_CS; msrs[n++].index = MSR_IA32_SYSENTER_ESP; msrs[n++].index = MSR_IA32_SYSENTER_EIP; - if (kvm_has_msr_star(env)) - msrs[n++].index = MSR_STAR; - if (kvm_has_msr_hsave_pa(env)) + if (has_msr_star) { + msrs[n++].index = MSR_STAR; + } + if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; + } msrs[n++].index = MSR_IA32_TSC; #ifdef TARGET_X86_64 if (lm_capable_kernel) { @@ -1070,23 +1072,27 @@ static int kvm_get_msrs(CPUState *env) #endif msrs[n++].index = MSR_KVM_SYSTEM_TIME; msrs[n++].index = MSR_KVM_WALL_CLOCK; -#ifdef KVM_CAP_ASYNC_PF - msrs[n++].index = MSR_KVM_ASYNC_PF_EN; +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) + if (has_msr_async_pf_en) { + msrs[n++].index = MSR_KVM_ASYNC_PF_EN; + } #endif #ifdef KVM_CAP_MCE if (env->mcg_cap) { msrs[n++].index = MSR_MCG_STATUS; msrs[n++].index = MSR_MCG_CTL; - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { msrs[n++].index = MSR_MC0_CTL + i; + } } #endif msr_data.info.nmsrs = n; ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data); - if (ret < 0) + if (ret < 0) { return ret; + } for (i = 0; i < ret; i++) { switch (msrs[i].index) { @@ -1144,7 +1150,7 @@ static int kvm_get_msrs(CPUState *env) } #endif break; -#ifdef KVM_CAP_ASYNC_PF +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ASYNC_PF) case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; break; @@ -1172,6 +1178,9 @@ static int kvm_get_mp_state(CPUState *env) return ret; } env->mp_state = mp_state.mp_state; + if (kvm_irqchip_in_kernel()) { + env->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED); + } return 0; } @@ -1314,7 +1323,7 @@ static int kvm_get_debugregs(CPUState *env) ret = kvm_vcpu_ioctl(env, KVM_GET_DEBUGREGS, &dbgregs); if (ret < 0) { - return ret; + return ret; } for (i = 0; i < 4; i++) { env->dr[i] = dbgregs.db[i]; @@ -1333,44 +1342,44 @@ int kvm_arch_put_registers(CPUState *env, int level) assert(cpu_is_stopped(env) || qemu_cpu_self(env)); ret = kvm_getput_regs(env, 1); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_xsave(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_xcrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_sregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_put_msrs(env, level); - if (ret < 0) + if (ret < 0) { return ret; - + } if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_mp_state(env); - if (ret < 0) + if (ret < 0) { return ret; + } } - ret = kvm_put_vcpu_events(env, level); - if (ret < 0) + if (ret < 0) { return ret; - + } + ret = kvm_put_debugregs(env); + if (ret < 0) { + return ret; + } /* must be last */ ret = kvm_guest_debug_workarounds(env); - if (ret < 0) - return ret; - - ret = kvm_put_debugregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } return 0; } @@ -1381,42 +1390,49 @@ int kvm_arch_get_registers(CPUState *env) assert(cpu_is_stopped(env) || qemu_cpu_self(env)); ret = kvm_getput_regs(env, 0); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_xsave(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_xcrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_sregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_msrs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_mp_state(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_vcpu_events(env); - if (ret < 0) + if (ret < 0) { return ret; - + } ret = kvm_get_debugregs(env); - if (ret < 0) + if (ret < 0) { return ret; - + } return 0; } int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { + /* Inject NMI */ + if (env->interrupt_request & CPU_INTERRUPT_NMI) { + env->interrupt_request &= ~CPU_INTERRUPT_NMI; + DPRINTF("injected NMI\n"); + kvm_vcpu_ioctl(env, KVM_NMI); + } + /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && (env->interrupt_request & CPU_INTERRUPT_HARD) && @@ -1438,10 +1454,11 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) * interrupt, request an interrupt window exit. This will * cause a return to userspace as soon as the guest is ready to * receive interrupts. */ - if ((env->interrupt_request & CPU_INTERRUPT_HARD)) + if ((env->interrupt_request & CPU_INTERRUPT_HARD)) { run->request_interrupt_window = 1; - else + } else { run->request_interrupt_window = 0; + } DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(env->apic_state); @@ -1451,11 +1468,11 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) int kvm_arch_post_run(CPUState *env, struct kvm_run *run) { - if (run->if_flag) + if (run->if_flag) { env->eflags |= IF_MASK; - else + } else { env->eflags &= ~IF_MASK; - + } cpu_set_apic_tpr(env->apic_state, run->cr8); cpu_set_apic_base(env->apic_state, run->apic_base); @@ -1491,8 +1508,19 @@ static int kvm_handle_halt(CPUState *env) return 1; } +static bool host_supports_vmx(void) +{ + uint32_t ecx, unused; + + host_cpuid(1, 0, &unused, &unused, &ecx, &unused); + return ecx & CPUID_EXT_VMX; +} + +#define VMX_INVALID_GUEST_STATE 0x80000021 + int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) { + uint64_t code; int ret = 0; switch (run->exit_reason) { @@ -1500,6 +1528,35 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) DPRINTF("handle_hlt\n"); ret = kvm_handle_halt(env); break; + case KVM_EXIT_SET_TPR: + ret = 1; + break; + case KVM_EXIT_FAIL_ENTRY: + code = run->fail_entry.hardware_entry_failure_reason; + fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n", + code); + if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) { + fprintf(stderr, + "\nIf you're runnning a guest on an Intel machine without " + "unrestricted mode\n" + "support, the failure can be most likely due to the guest " + "entering an invalid\n" + "state for Intel VT. For example, the guest maybe running " + "in big real mode\n" + "which is not supported on less recent Intel processors." + "\n\n"); + } + ret = -1; + break; + case KVM_EXIT_EXCEPTION: + fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n", + run->ex.exception, run->ex.error_code); + ret = -1; + break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; + break; } return ret; @@ -1511,8 +1568,9 @@ int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) static const uint8_t int3 = 0xcc; if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) + cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) { return -EINVAL; + } return 0; } @@ -1521,8 +1579,9 @@ int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) uint8_t int3; if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) + cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { return -EINVAL; + } return 0; } @@ -1538,10 +1597,12 @@ static int find_hw_breakpoint(target_ulong addr, int len, int type) { int n; - for (n = 0; n < nb_hw_breakpoint; n++) + for (n = 0; n < nb_hw_breakpoint; n++) { if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && - (hw_breakpoint[n].len == len || len == -1)) + (hw_breakpoint[n].len == len || len == -1)) { return n; + } + } return -1; } @@ -1560,8 +1621,9 @@ int kvm_arch_insert_hw_breakpoint(target_ulong addr, case 2: case 4: case 8: - if (addr & (len - 1)) + if (addr & (len - 1)) { return -EINVAL; + } break; default: return -EINVAL; @@ -1571,12 +1633,12 @@ int kvm_arch_insert_hw_breakpoint(target_ulong addr, return -ENOSYS; } - if (nb_hw_breakpoint == 4) + if (nb_hw_breakpoint == 4) { return -ENOBUFS; - - if (find_hw_breakpoint(addr, len, type) >= 0) + } + if (find_hw_breakpoint(addr, len, type) >= 0) { return -EEXIST; - + } hw_breakpoint[nb_hw_breakpoint].addr = addr; hw_breakpoint[nb_hw_breakpoint].len = len; hw_breakpoint[nb_hw_breakpoint].type = type; @@ -1591,9 +1653,9 @@ int kvm_arch_remove_hw_breakpoint(target_ulong addr, int n; n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); - if (n < 0) + if (n < 0) { return -ENOENT; - + } nb_hw_breakpoint--; hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; @@ -1614,11 +1676,12 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) if (arch_info->exception == 1) { if (arch_info->dr6 & (1 << 14)) { - if (cpu_single_env->singlestep_enabled) + if (cpu_single_env->singlestep_enabled) { handle = 1; + } } else { - for (n = 0; n < 4; n++) - if (arch_info->dr6 & (1 << n)) + for (n = 0; n < 4; n++) { + if (arch_info->dr6 & (1 << n)) { switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { case 0x0: handle = 1; @@ -1636,10 +1699,12 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) hw_watchpoint.flags = BP_MEM_ACCESS; break; } + } + } } - } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) + } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) { handle = 1; - + } if (!handle) { cpu_synchronize_state(cpu_single_env); assert(cpu_single_env->exception_injected == -1); @@ -1663,9 +1728,9 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) }; int n; - if (kvm_sw_breakpoints_active(env)) + if (kvm_sw_breakpoints_active(env)) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; - + } if (nb_hw_breakpoint > 0) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; dbg->arch.debugreg[7] = 0x0600; @@ -1673,18 +1738,16 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) dbg->arch.debugreg[n] = hw_breakpoint[n].addr; dbg->arch.debugreg[7] |= (2 << (n * 2)) | (type_code[hw_breakpoint[n].type] << (16 + n*4)) | - (len_code[hw_breakpoint[n].len] << (18 + n*4)); + ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4)); } } - /* Legal xcr0 for loading */ - env->xcr0 = 1; } #endif /* KVM_CAP_SET_GUEST_DEBUG */ bool kvm_arch_stop_on_emulation_error(CPUState *env) { - return !(env->cr[0] & CR0_PE_MASK) || - ((env->segs[R_CS].selector & 3) != 3); + return !(env->cr[0] & CR0_PE_MASK) || + ((env->segs[R_CS].selector & 3) != 3); } static void hardware_memory_error(void) @@ -1696,64 +1759,96 @@ static void hardware_memory_error(void) #ifdef KVM_CAP_MCE static void kvm_mce_broadcast_rest(CPUState *env) { + struct kvm_x86_mce mce = { + .bank = 1, + .status = MCI_STATUS_VAL | MCI_STATUS_UC, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = 0, + .misc = 0, + }; CPUState *cenv; - int family, model, cpuver = env->cpuid_version; - - family = (cpuver >> 8) & 0xf; - model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0xf); /* Broadcast MCA signal for processor version 06H_EH and above */ - if ((family == 6 && model >= 14) || family > 6) { + if (cpu_x86_support_mca_broadcast(env)) { for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { if (cenv == env) { continue; } - kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1); + kvm_inject_x86_mce_on(cenv, &mce, ABORT_ON_ERROR); } } } + +static void kvm_mce_inj_srar_dataload(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | MCI_STATUS_AR | 0x134, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + int r; + + r = kvm_set_mce(env, &mce); + if (r < 0) { + fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); + abort(); + } + kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + int r; + + r = kvm_set_mce(env, &mce); + if (r < 0) { + fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); + abort(); + } + kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr) +{ + struct kvm_x86_mce mce = { + .bank = 9, + .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, + .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, + .addr = paddr, + .misc = (MCM_ADDR_PHYS << 6) | 0xc, + }; + + kvm_inject_x86_mce_on(env, &mce, ABORT_ON_ERROR); + kvm_mce_broadcast_rest(env); +} + #endif int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) { #if defined(KVM_CAP_MCE) - struct kvm_x86_mce mce = { - .bank = 9, - }; void *vaddr; ram_addr_t ram_addr; target_phys_addr_t paddr; - int r; if ((env->mcg_cap & MCG_SER_P) && addr && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) { - if (code == BUS_MCEERR_AR) { - /* Fake an Intel architectural Data Load SRAR UCR */ - mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | MCI_STATUS_AR | 0x134; - mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; - mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; - } else { - /* - * If there is an MCE excpetion being processed, ignore - * this SRAO MCE - */ - r = kvm_mce_in_exception(env); - if (r == -1) { - fprintf(stderr, "Failed to get MCE status\n"); - } else if (r) { - return 0; - } - /* Fake an Intel architectural Memory scrubbing UCR */ - mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0; - mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; - mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; - } vaddr = (void *)addr; if (qemu_ram_addr_from_host(vaddr, &ram_addr) || !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) { @@ -1766,13 +1861,20 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) hardware_memory_error(); } } - mce.addr = paddr; - r = kvm_set_mce(env, &mce); - if (r < 0) { - fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); - abort(); + + if (code == BUS_MCEERR_AR) { + /* Fake an Intel architectural Data Load SRAR UCR */ + kvm_mce_inj_srar_dataload(env, paddr); + } else { + /* + * If there is an MCE excpetion being processed, ignore + * this SRAO MCE + */ + if (!kvm_mce_in_progress(env)) { + /* Fake an Intel architectural Memory scrubbing UCR */ + kvm_mce_inj_srao_memscrub(env, paddr); + } } - kvm_mce_broadcast_rest(env); } else #endif { @@ -1791,7 +1893,6 @@ int kvm_on_sigbus(int code, void *addr) { #if defined(KVM_CAP_MCE) if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { - uint64_t status; void *vaddr; ram_addr_t ram_addr; target_phys_addr_t paddr; @@ -1804,13 +1905,7 @@ int kvm_on_sigbus(int code, void *addr) "QEMU itself instead of guest system!: %p\n", addr); return 0; } - status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0; - kvm_inject_x86_mce(first_cpu, 9, status, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, 1); - kvm_mce_broadcast_rest(first_cpu); + kvm_mce_inj_srao_memscrub2(first_cpu, paddr); } else #endif { diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h index 04932cf4c8..9d7b584267 100644 --- a/target-i386/kvm_x86.h +++ b/target-i386/kvm_x86.h @@ -15,8 +15,11 @@ #ifndef __KVM_X86_H__ #define __KVM_X86_H__ +#define ABORT_ON_ERROR 0x01 +#define MCE_BROADCAST 0x02 + void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, - int abort_on_error); + int flag); #endif |