From 75b208c28316095c4685e8596ceb9e3f656592e2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@redhat.com> Date: Mon, 23 Nov 2020 07:17:47 -0500 Subject: target/i386: fix operand order for PDEP and PEXT For PDEP and PEXT, the mask is provided in the memory (mod+r/m) operand, and therefore is loaded in s->T0 by gen_ldst_modrm. The source is provided in the second source operand (VEX.vvvv) and therefore is loaded in s->T1. Fix the order in which they are passed to the helpers. Reported-by: Lenard Szolnoki <blog@lenardszolnoki.com> Analyzed-by: Lenard Szolnoki <blog@lenardszolnoki.com> Fixes: https://bugs.launchpad.net/qemu/+bug/1605123 Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/translate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'target') diff --git a/target/i386/translate.c b/target/i386/translate.c index 4c57307e42..e8f5f5803a 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -3936,14 +3936,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the mask operand, we + /* Note that by zero-extending the source operand, we automatically handle zero-extending the result. */ if (ot == MO_64) { tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); } else { tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); } - gen_helper_pdep(cpu_regs[reg], s->T0, s->T1); + gen_helper_pdep(cpu_regs[reg], s->T1, s->T0); break; case 0x2f5: /* pext Gy, By, Ey */ @@ -3954,14 +3954,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the mask operand, we + /* Note that by zero-extending the source operand, we automatically handle zero-extending the result. */ if (ot == MO_64) { tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); } else { tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); } - gen_helper_pext(cpu_regs[reg], s->T0, s->T1); + gen_helper_pext(cpu_regs[reg], s->T1, s->T0); break; case 0x1f6: /* adcx Gy, Ey */ -- cgit v1.2.3 From c1bb5418e32ec70c72af332354b5963eab7a5579 Mon Sep 17 00:00:00 2001 From: David Woodhouse <dwmw2@infradead.org> Date: Mon, 5 Oct 2020 15:18:19 +0100 Subject: target/i386: Support up to 32768 CPUs without IRQ remapping The IOAPIC has an 'Extended Destination ID' field in its RTE, which maps to bits 11-4 of the MSI address. Since those address bits fall within a given 4KiB page they were historically non-trivial to use on real hardware. The Intel IOMMU uses the lowest bit to indicate a remappable format MSI, and then the remaining 7 bits are part of the index. Where the remappable format bit isn't set, we can actually use the other seven to allow external (IOAPIC and MSI) interrupts to reach up to 32768 CPUs instead of just the 255 permitted on bare metal. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Message-Id: <78097f9218300e63e751e077a0a5ca029b56ba46.camel@infradead.org> [Fix UBSAN warning. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: David Woodhouse <dwmw2@infradead.org> --- target/i386/cpu.c | 5 +++- target/i386/kvm.c | 77 +++++++++++++++++++++++++++++++++++++------------- target/i386/kvm_i386.h | 2 ++ 3 files changed, 64 insertions(+), 20 deletions(-) (limited to 'target') diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 5a8c96072e..b90ed05897 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -799,7 +799,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock", "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt", NULL, "kvm-pv-tlb-flush", NULL, "kvm-pv-ipi", - "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", NULL, + "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", "kvm-msi-ext-dest-id", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "kvmclock-stable-bit", NULL, NULL, NULL, @@ -4114,6 +4114,7 @@ static PropValue kvm_default_props[] = { { "kvm-pv-eoi", "on" }, { "kvmclock-stable-bit", "on" }, { "x2apic", "on" }, + { "kvm-msi-ext-dest-id", "off" }, { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -5140,6 +5141,8 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) if (kvm_enabled()) { if (!kvm_irqchip_in_kernel()) { x86_cpu_change_kvm_default("x2apic", "off"); + } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) { + x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on"); } x86_cpu_apply_props(cpu, kvm_default_props); diff --git a/target/i386/kvm.c b/target/i386/kvm.c index a2934dda02..bcfa4b03e0 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -416,6 +416,9 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (!kvm_irqchip_in_kernel()) { ret &= ~(1U << KVM_FEATURE_PV_UNHALT); } + if (kvm_irqchip_is_split()) { + ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID; + } } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) { ret |= 1U << KVM_HINTS_REALTIME; } @@ -4589,38 +4592,74 @@ int kvm_arch_irqchip_create(KVMState *s) } } +uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address) +{ + CPUX86State *env; + uint64_t ext_id; + + if (!first_cpu) { + return address; + } + env = &X86_CPU(first_cpu)->env; + if (!(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) { + return address; + } + + /* + * If the remappable format bit is set, or the upper bits are + * already set in address_hi, or the low extended bits aren't + * there anyway, do nothing. + */ + ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT); + if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) || (address >> 32)) { + return address; + } + + address &= ~ext_id; + address |= ext_id << 35; + return address; +} + int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, uint64_t address, uint32_t data, PCIDevice *dev) { X86IOMMUState *iommu = x86_iommu_get_default(); if (iommu) { - int ret; - MSIMessage src, dst; X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu); - if (!class->int_remap) { - return 0; - } + if (class->int_remap) { + int ret; + MSIMessage src, dst; - src.address = route->u.msi.address_hi; - src.address <<= VTD_MSI_ADDR_HI_SHIFT; - src.address |= route->u.msi.address_lo; - src.data = route->u.msi.data; + src.address = route->u.msi.address_hi; + src.address <<= VTD_MSI_ADDR_HI_SHIFT; + src.address |= route->u.msi.address_lo; + src.data = route->u.msi.data; - ret = class->int_remap(iommu, &src, &dst, dev ? \ - pci_requester_id(dev) : \ - X86_IOMMU_SID_INVALID); - if (ret) { - trace_kvm_x86_fixup_msi_error(route->gsi); - return 1; - } + ret = class->int_remap(iommu, &src, &dst, dev ? \ + pci_requester_id(dev) : \ + X86_IOMMU_SID_INVALID); + if (ret) { + trace_kvm_x86_fixup_msi_error(route->gsi); + return 1; + } + + /* + * Handled untranslated compatibilty format interrupt with + * extended destination ID in the low bits 11-5. */ + dst.address = kvm_swizzle_msi_ext_dest_id(dst.address); - route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; - route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; - route->u.msi.data = dst.data; + route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; + route->u.msi.data = dst.data; + return 0; + } } + address = kvm_swizzle_msi_ext_dest_id(address); + route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK; return 0; } diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h index a4a619cebb..dc72508389 100644 --- a/target/i386/kvm_i386.h +++ b/target/i386/kvm_i386.h @@ -48,4 +48,6 @@ bool kvm_has_waitpkg(void); bool kvm_hv_vpindex_settable(void); +uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); + #endif -- cgit v1.2.3 From c2ba0515f2df58a661fcb5d6485139877d92ab1b Mon Sep 17 00:00:00 2001 From: Bin Meng <bin.meng@windriver.com> Date: Fri, 13 Nov 2020 17:56:18 +0800 Subject: target/i386: seg_helper: Correct segment selector nullification in the RET/IRET helper Per the SDM, when returning to outer privilege level, for segment registers (ES, FS, GS, and DS) if the check fails, the segment selector becomes null, but QEMU clears the base/limit/flags as well as nullifying the segment selector, which should be a spec violation. Real hardware seems to be compliant with the spec, at least on one Coffee Lake board I tested. Signed-off-by: Bin Meng <bin.meng@windriver.com> Message-Id: <1605261378-77971-1-git-send-email-bmeng.cn@gmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/seg_helper.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'target') diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c index 09b6554660..e6ffa1f018 100644 --- a/target/i386/seg_helper.c +++ b/target/i386/seg_helper.c @@ -2108,7 +2108,10 @@ static inline void validate_seg(CPUX86State *env, int seg_reg, int cpl) if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) { /* data or non conforming code segment */ if (dpl < cpl) { - cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, seg_reg, 0, + env->segs[seg_reg].base, + env->segs[seg_reg].limit, + env->segs[seg_reg].flags & ~DESC_P_MASK); } } } -- cgit v1.2.3 From faf20793b5af15ed4bea9c40dd8e6ae46d51be23 Mon Sep 17 00:00:00 2001 From: Sunil Muthuswamy <sunilmut@microsoft.com> Date: Wed, 28 Oct 2020 02:23:19 +0000 Subject: WHPX: support for the kernel-irqchip on/off This patch adds support the kernel-irqchip option for WHPX with on or off value. 'split' value is not supported for the option. The option only works for the latest version of Windows (ones that are coming out on Insiders). The change maintains backward compatibility on older version of Windows where this option is not supported. Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com> Message-Id: <SN4PR2101MB0880B13258DA9251F8459F4DC0170@SN4PR2101MB0880.namprd21.prod.outlook.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 3 + target/i386/meson.build | 1 + target/i386/whp-dispatch.h | 9 +- target/i386/whpx-all.c | 291 +++++++++++++++++++++++++++++++++++++++------ target/i386/whpx-apic.c | 274 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 543 insertions(+), 35 deletions(-) create mode 100644 target/i386/whpx-apic.c (limited to 'target') diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b90ed05897..af48e3f79e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -30,6 +30,7 @@ #include "sysemu/hvf.h" #include "sysemu/cpus.h" #include "sysemu/xen.h" +#include "sysemu/whpx.h" #include "kvm_i386.h" #include "sev_i386.h" @@ -6148,6 +6149,8 @@ APICCommonClass *apic_get_class(void) apic_type = "kvm-apic"; } else if (xen_enabled()) { apic_type = "xen-apic"; + } else if (whpx_apic_in_platform()) { + apic_type = "whpx-apic"; } return APIC_COMMON_CLASS(object_class_by_name(apic_type)); diff --git a/target/i386/meson.build b/target/i386/meson.build index a1a02f3e99..fc3ee80386 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -33,6 +33,7 @@ i386_softmmu_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c')) i386_softmmu_ss.add(when: 'CONFIG_WHPX', if_true: files( 'whpx-all.c', 'whpx-cpus.c', + 'whpx-apic.c', )) i386_softmmu_ss.add(when: 'CONFIG_HAX', if_true: files( 'hax-all.c', diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h index b18aba20ed..cef5d848bd 100644 --- a/target/i386/whp-dispatch.h +++ b/target/i386/whp-dispatch.h @@ -30,6 +30,14 @@ */ #define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \ X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \ + X(HRESULT, WHvRequestInterrupt, (WHV_PARTITION_HANDLE Partition, \ + WHV_INTERRUPT_CONTROL* Interrupt, UINT32 InterruptControlSize)) \ + X(HRESULT, WHvGetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize, UINT32* WrittenSize)) \ + X(HRESULT, WHvSetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize)) \ #define LIST_WINHVEMULATION_FUNCTIONS(X) \ X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \ @@ -37,7 +45,6 @@ X(HRESULT, WHvEmulatorTryIoEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_X64_IO_PORT_ACCESS_CONTEXT* IoInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ X(HRESULT, WHvEmulatorTryMmioEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_MEMORY_ACCESS_CONTEXT* MmioInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ - #define WHP_DEFINE_TYPE(return_type, function_name, signature) \ typedef return_type (WINAPI *function_name ## _t) signature; diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c index f4f3e33eac..3b824fc9d7 100644 --- a/target/i386/whpx-all.c +++ b/target/i386/whpx-all.c @@ -19,10 +19,15 @@ #include "sysemu/runstate.h" #include "qemu/main-loop.h" #include "hw/boards.h" +#include "hw/i386/ioapic.h" +#include "hw/i386/apic_internal.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "qapi/qapi-types-common.h" +#include "qapi/qapi-visit-common.h" #include "migration/blocker.h" #include "whp-dispatch.h" +#include <winerror.h> #include "whpx-cpus.h" @@ -31,11 +36,6 @@ #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL) -struct whpx_state { - uint64_t mem_quota; - WHV_PARTITION_HANDLE partition; -}; - static const WHV_REGISTER_NAME whpx_register_names[] = { /* X64 General purpose registers */ @@ -152,6 +152,7 @@ struct whpx_vcpu { WHV_EMULATOR_HANDLE emulator; bool window_registered; bool interruptable; + bool ready_for_pic_interrupt; uint64_t tpr; uint64_t apic_base; bool interruption_pending; @@ -163,7 +164,7 @@ struct whpx_vcpu { static bool whpx_allowed; static bool whp_dispatch_initialized; static HMODULE hWinHvPlatform, hWinHvEmulation; - +static uint32_t max_vcpu_index; struct whpx_state whpx_global; struct WHPDispatch whp_dispatch; @@ -599,6 +600,10 @@ static void whpx_get_registers(CPUState *cpu) assert(idx == RTL_NUMBER_OF(whpx_register_names)); + if (whpx_apic_in_platform()) { + whpx_apic_get(x86_cpu->apic_state); + } + return; } @@ -820,26 +825,42 @@ static void whpx_vcpu_pre_run(CPUState *cpu) } /* Get pending hard interruption or replay one that was overwritten */ - if (!vcpu->interruption_pending && - vcpu->interruptable && (env->eflags & IF_MASK)) { - assert(!new_int.InterruptionPending); - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - new_int.InterruptionType = WHvX64PendingInterrupt; - new_int.InterruptionPending = 1; - new_int.InterruptionVector = irq; + if (!whpx_apic_in_platform()) { + if (!vcpu->interruption_pending && + vcpu->interruptable && (env->eflags & IF_MASK)) { + assert(!new_int.InterruptionPending); + if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + new_int.InterruptionType = WHvX64PendingInterrupt; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = irq; + } } } - } - /* Setup interrupt state if new one was prepared */ - if (new_int.InterruptionPending) { - reg_values[reg_count].PendingInterruption = new_int; - reg_names[reg_count] = WHvRegisterPendingInterruption; - reg_count += 1; - } + /* Setup interrupt state if new one was prepared */ + if (new_int.InterruptionPending) { + reg_values[reg_count].PendingInterruption = new_int; + reg_names[reg_count] = WHvRegisterPendingInterruption; + reg_count += 1; + } + } else if (vcpu->ready_for_pic_interrupt && + (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + reg_names[reg_count] = WHvRegisterPendingEvent; + reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT) + { + .EventPending = 1, + .EventType = WHvX64PendingEventExtInt, + .Vector = irq, + }; + reg_count += 1; + } + } /* Sync the TPR to the CR8 if was modified during the intercept */ tpr = cpu_get_apic_tpr(x86_cpu->apic_state); @@ -854,14 +875,17 @@ static void whpx_vcpu_pre_run(CPUState *cpu) /* Update the state of the interrupt delivery notification */ if (!vcpu->window_registered && cpu->interrupt_request & CPU_INTERRUPT_HARD) { - reg_values[reg_count].DeliverabilityNotifications.InterruptNotification - = 1; + reg_values[reg_count].DeliverabilityNotifications = + (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { + .InterruptNotification = 1 + }; vcpu->window_registered = 1; reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; reg_count += 1; } qemu_mutex_unlock_iothread(); + vcpu->ready_for_pic_interrupt = false; if (reg_count) { hr = whp_dispatch.WHvSetVirtualProcessorRegisters( @@ -948,7 +972,7 @@ static int whpx_vcpu_run(CPUState *cpu) int ret; whpx_vcpu_process_async_events(cpu); - if (cpu->halted) { + if (cpu->halted && !whpx_apic_in_platform()) { cpu->exception_index = EXCP_HLT; qatomic_set(&cpu->exit_request, false); return 0; @@ -992,14 +1016,114 @@ static int whpx_vcpu_run(CPUState *cpu) break; case WHvRunVpExitReasonX64InterruptWindow: + vcpu->ready_for_pic_interrupt = 1; vcpu->window_registered = 0; ret = 0; break; + case WHvRunVpExitReasonX64ApicEoi: + assert(whpx_apic_in_platform()); + ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector); + break; + case WHvRunVpExitReasonX64Halt: ret = whpx_handle_halt(cpu); break; + case WHvRunVpExitReasonX64ApicInitSipiTrap: { + WHV_INTERRUPT_CONTROL ipi = {0}; + uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr; + uint32_t delivery_mode = + (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT; + int dest_shorthand = + (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; + bool broadcast = false; + bool include_self = false; + uint32_t i; + + /* We only registered for INIT and SIPI exits. */ + if ((delivery_mode != APIC_DM_INIT) && + (delivery_mode != APIC_DM_SIPI)) { + error_report( + "WHPX: Unexpected APIC exit that is not a INIT or SIPI"); + break; + } + + if (delivery_mode == APIC_DM_INIT) { + ipi.Type = WHvX64InterruptTypeInit; + } else { + ipi.Type = WHvX64InterruptTypeSipi; + } + + ipi.DestinationMode = + ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical; + + ipi.TriggerMode = + ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ? + WHvX64InterruptTriggerModeLevel : + WHvX64InterruptTriggerModeEdge; + + ipi.Vector = icr & APIC_VECTOR_MASK; + switch (dest_shorthand) { + /* no shorthand. Bits 56-63 contain the destination. */ + case 0: + ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report("WHPX: Failed to request interrupt hr=%08lx", + hr); + } + + break; + + /* self */ + case 1: + include_self = true; + break; + + /* broadcast, including self */ + case 2: + broadcast = true; + include_self = true; + break; + + /* broadcast, excluding self */ + case 3: + broadcast = true; + break; + } + + if (!broadcast && !include_self) { + break; + } + + for (i = 0; i <= max_vcpu_index; i++) { + if (i == cpu->cpu_index && !include_self) { + continue; + } + + /* + * Assuming that APIC Ids are identity mapped since + * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers + * are not handled yet and the hypervisor doesn't allow the + * guest to modify the APIC ID. + */ + ipi.Destination = i; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report( + "WHPX: Failed to request SIPI for %d, hr=%08lx", + i, hr); + } + } + + break; + } + case WHvRunVpExitReasonCanceled: cpu->exception_index = EXCP_INTERRUPT; ret = 1; @@ -1314,6 +1438,7 @@ int whpx_init_vcpu(CPUState *cpu) vcpu->interruptable = true; cpu->vcpu_dirty = true; cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; + max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr); return 0; @@ -1549,6 +1674,43 @@ error: return false; } +static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + struct whpx_state *whpx = &whpx_global; + OnOffSplit mode; + + if (!visit_type_OnOffSplit(v, name, &mode, errp)) { + return; + } + + switch (mode) { + case ON_OFF_SPLIT_ON: + whpx->kernel_irqchip_allowed = true; + whpx->kernel_irqchip_required = true; + break; + + case ON_OFF_SPLIT_OFF: + whpx->kernel_irqchip_allowed = false; + whpx->kernel_irqchip_required = false; + break; + + case ON_OFF_SPLIT_SPLIT: + error_setg(errp, "WHPX: split irqchip currently not supported"); + error_append_hint(errp, + "Try without kernel-irqchip or with kernel-irqchip=on|off"); + break; + + default: + /* + * The value was checked in visit_type_OnOffSplit() above. If + * we get here, then something is wrong in QEMU. + */ + abort(); + } +} + /* * Partition support */ @@ -1562,6 +1724,7 @@ static int whpx_accel_init(MachineState *ms) UINT32 whpx_cap_size; WHV_PARTITION_PROPERTY prop; UINT32 cpuidExitList[] = {1, 0x80000001}; + WHV_CAPABILITY_FEATURES features = {0}; whpx = &whpx_global; @@ -1570,7 +1733,6 @@ static int whpx_accel_init(MachineState *ms) goto error; } - memset(whpx, 0, sizeof(struct whpx_state)); whpx->mem_quota = ms->ram_size; hr = whp_dispatch.WHvGetCapability( @@ -1582,6 +1744,14 @@ static int whpx_accel_init(MachineState *ms) goto error; } + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeFeatures, &features, sizeof(features), NULL); + if (FAILED(hr)) { + error_report("WHPX: Failed to query capabilities, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + hr = whp_dispatch.WHvCreatePartition(&whpx->partition); if (FAILED(hr)) { error_report("WHPX: Failed to create partition, hr=%08lx", hr); @@ -1604,18 +1774,55 @@ static int whpx_accel_init(MachineState *ms) goto error; } + /* + * Error out if WHP doesn't support apic emulation and user is requiring + * it. + */ + if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation || + !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) { + error_report("WHPX: kernel irqchip requested, but unavailable. " + "Try without kernel-irqchip or with kernel-irqchip=off"); + ret = -EINVAL; + goto error; + } + + if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation && + whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) { + WHV_X64_LOCAL_APIC_EMULATION_MODE mode = + WHvX64LocalApicEmulationModeXApic; + printf("WHPX: setting APIC emulation mode in the hypervisor\n"); + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &mode, + sizeof(mode)); + if (FAILED(hr)) { + error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr); + if (whpx->kernel_irqchip_required) { + error_report("WHPX: kernel irqchip requested, but unavailable"); + ret = -EINVAL; + goto error; + } + } else { + whpx->apic_in_platform = true; + } + } + + /* Register for MSR and CPUID exits */ memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); prop.ExtendedVmExits.X64MsrExit = 1; prop.ExtendedVmExits.X64CpuidExit = 1; - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeExtendedVmExits, - &prop, - sizeof(WHV_PARTITION_PROPERTY)); + if (whpx_apic_in_platform()) { + prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1; + } + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeExtendedVmExits, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); if (FAILED(hr)) { - error_report("WHPX: Failed to enable partition extended X64MsrExit and" - " X64CpuidExit hr=%08lx", hr); + error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr); ret = -EINVAL; goto error; } @@ -1668,11 +1875,27 @@ static void whpx_accel_class_init(ObjectClass *oc, void *data) ac->name = "WHPX"; ac->init_machine = whpx_accel_init; ac->allowed = &whpx_allowed; + + object_class_property_add(oc, "kernel-irqchip", "on|off|split", + NULL, whpx_set_kernel_irqchip, + NULL, NULL); + object_class_property_set_description(oc, "kernel-irqchip", + "Configure WHPX in-kernel irqchip"); +} + +static void whpx_accel_instance_init(Object *obj) +{ + struct whpx_state *whpx = &whpx_global; + + memset(whpx, 0, sizeof(struct whpx_state)); + /* Turn on kernel-irqchip, by default */ + whpx->kernel_irqchip_allowed = true; } static const TypeInfo whpx_accel_type = { .name = ACCEL_CLASS_NAME("whpx"), .parent = TYPE_ACCEL, + .instance_init = whpx_accel_instance_init, .class_init = whpx_accel_class_init, }; diff --git a/target/i386/whpx-apic.c b/target/i386/whpx-apic.c new file mode 100644 index 0000000000..b127a3cb8a --- /dev/null +++ b/target/i386/whpx-apic.c @@ -0,0 +1,274 @@ +/* + * WHPX platform APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * John Starks <jostarks@microsoft.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/pci/msi.h" +#include "sysemu/hw_accel.h" +#include "sysemu/whpx.h" +#include "whp-dispatch.h" + +static void whpx_put_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i; + + memset(kapic, 0, sizeof(*kapic)); + kapic->fields[0x2].data = s->id << 24; + kapic->fields[0x3].data = s->version | ((APIC_LVT_NB - 1) << 16); + kapic->fields[0x8].data = s->tpr; + kapic->fields[0xd].data = s->log_dest << 24; + kapic->fields[0xe].data = s->dest_mode << 28 | 0x0fffffff; + kapic->fields[0xf].data = s->spurious_vec; + for (i = 0; i < 8; i++) { + kapic->fields[0x10 + i].data = s->isr[i]; + kapic->fields[0x18 + i].data = s->tmr[i]; + kapic->fields[0x20 + i].data = s->irr[i]; + } + + kapic->fields[0x28].data = s->esr; + kapic->fields[0x30].data = s->icr[0]; + kapic->fields[0x31].data = s->icr[1]; + for (i = 0; i < APIC_LVT_NB; i++) { + kapic->fields[0x32 + i].data = s->lvt[i]; + } + + kapic->fields[0x38].data = s->initial_count; + kapic->fields[0x3e].data = s->divide_conf; +} + +static void whpx_get_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i, v; + + s->id = kapic->fields[0x2].data >> 24; + s->tpr = kapic->fields[0x8].data; + s->arb_id = kapic->fields[0x9].data; + s->log_dest = kapic->fields[0xd].data >> 24; + s->dest_mode = kapic->fields[0xe].data >> 28; + s->spurious_vec = kapic->fields[0xf].data; + for (i = 0; i < 8; i++) { + s->isr[i] = kapic->fields[0x10 + i].data; + s->tmr[i] = kapic->fields[0x18 + i].data; + s->irr[i] = kapic->fields[0x20 + i].data; + } + + s->esr = kapic->fields[0x28].data; + s->icr[0] = kapic->fields[0x30].data; + s->icr[1] = kapic->fields[0x31].data; + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kapic->fields[0x32 + i].data; + } + + s->initial_count = kapic->fields[0x38].data; + s->divide_conf = kapic->fields[0x3e].data; + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + apic_next_timer(s, s->initial_count_load_time); +} + +static void whpx_apic_set_base(APICCommonState *s, uint64_t val) +{ + s->apicbase = val; +} + +static void whpx_put_apic_base(CPUState *cpu, uint64_t val) +{ + HRESULT hr; + WHV_REGISTER_VALUE reg_value = {.Reg64 = val}; + WHV_REGISTER_NAME reg_name = WHvX64RegisterApicBase; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx_global.partition, + cpu->cpu_index, + ®_name, 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set MSR APIC base, hr=%08lx", hr); + } +} + +static void whpx_apic_set_tpr(APICCommonState *s, uint8_t val) +{ + s->tpr = val; +} + +static uint8_t whpx_apic_get_tpr(APICCommonState *s) +{ + return s->tpr; +} + +static void whpx_apic_vapic_base_update(APICCommonState *s) +{ + /* not implemented yet */ +} + +static void whpx_apic_put(CPUState *cs, run_on_cpu_data data) +{ + APICCommonState *s = data.host_ptr; + struct whpx_lapic_state kapic; + HRESULT hr; + + whpx_put_apic_base(CPU(s->cpu), s->apicbase); + whpx_put_apic_state(s, &kapic); + + hr = whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cs->cpu_index, + &kapic, + sizeof(kapic)); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } +} + +void whpx_apic_get(DeviceState *dev) +{ + APICCommonState *s = APIC_COMMON(dev); + CPUState *cpu = CPU(s->cpu); + struct whpx_lapic_state kapic; + + HRESULT hr = whp_dispatch.WHvGetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cpu->cpu_index, + &kapic, + sizeof(kapic), + NULL); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } + + whpx_get_apic_state(s, &kapic); +} + +static void whpx_apic_post_load(APICCommonState *s) +{ + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_external_nmi(APICCommonState *s) +{ +} + +static void whpx_send_msi(MSIMessage *msg) +{ + uint64_t addr = msg->address; + uint32_t data = msg->data; + uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; + uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; + + WHV_INTERRUPT_CONTROL interrupt = { + /* Values correspond to delivery modes */ + .Type = delivery, + .DestinationMode = dest_mode ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical, + + .TriggerMode = trigger_mode ? + WHvX64InterruptTriggerModeLevel : WHvX64InterruptTriggerModeEdge, + .Reserved = 0, + .Vector = vector, + .Destination = dest, + }; + HRESULT hr = whp_dispatch.WHvRequestInterrupt(whpx_global.partition, + &interrupt, sizeof(interrupt)); + if (FAILED(hr)) { + fprintf(stderr, "whpx: injection failed, MSI (%llx, %x) delivery: %d, " + "dest_mode: %d, trigger mode: %d, vector: %d, lost (%08lx)\n", + addr, data, delivery, dest_mode, trigger_mode, vector, hr); + } +} + +static uint64_t whpx_apic_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void whpx_apic_mem_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + whpx_send_msi(&msg); +} + +static const MemoryRegionOps whpx_apic_io_ops = { + .read = whpx_apic_mem_read, + .write = whpx_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void whpx_apic_reset(APICCommonState *s) +{ + /* Not used by WHPX. */ + s->wait_for_sipi = 0; + + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_realize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + memory_region_init_io(&s->io_memory, OBJECT(s), &whpx_apic_io_ops, s, + "whpx-apic-msi", APIC_SPACE_SIZE); + + msi_nonbroken = true; +} + +static void whpx_apic_class_init(ObjectClass *klass, void *data) +{ + APICCommonClass *k = APIC_COMMON_CLASS(klass); + + k->realize = whpx_apic_realize; + k->reset = whpx_apic_reset; + k->set_base = whpx_apic_set_base; + k->set_tpr = whpx_apic_set_tpr; + k->get_tpr = whpx_apic_get_tpr; + k->post_load = whpx_apic_post_load; + k->vapic_base_update = whpx_apic_vapic_base_update; + k->external_nmi = whpx_apic_external_nmi; + k->send_msi = whpx_send_msi; +} + +static const TypeInfo whpx_apic_info = { + .name = "whpx-apic", + .parent = TYPE_APIC_COMMON, + .instance_size = sizeof(APICCommonState), + .class_init = whpx_apic_class_init, +}; + +static void whpx_apic_register_types(void) +{ + type_register_static(&whpx_apic_info); +} + +type_init(whpx_apic_register_types) -- cgit v1.2.3 From 6e504a989dece8136d58f9f7c42f6e22b1ce49ae Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@redhat.com> Date: Wed, 28 Oct 2020 06:18:20 -0400 Subject: arm: do not use ram_size global Use the machine properties instead. Cc: qemu-ppc@nongnu.org Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/arm/arm-semi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'target') diff --git a/target/arm/arm-semi.c b/target/arm/arm-semi.c index c892e0e674..f7b7bff522 100644 --- a/target/arm/arm-semi.c +++ b/target/arm/arm-semi.c @@ -37,6 +37,7 @@ #include "exec/gdbstub.h" #include "qemu/cutils.h" #include "hw/arm/boot.h" +#include "hw/boards.h" #endif #define TARGET_SYS_OPEN 0x01 @@ -1048,7 +1049,7 @@ target_ulong do_arm_semihosting(CPUARMState *env) retvals[2] = ts->stack_base; retvals[3] = 0; /* Stack limit. */ #else - limit = ram_size; + limit = current_machine->ram_size; /* TODO: Make this use the limit of the loaded application. */ retvals[0] = rambase + limit / 2; retvals[1] = rambase + limit; -- cgit v1.2.3 From 5601d24164551b3473588837c4f92ef1bbde0415 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@redhat.com> Date: Wed, 28 Oct 2020 06:15:40 -0400 Subject: m68k: do not use ram_size global Use the machine properties instead. Cc: Laurent Vivier <lvivier@redhat.com> Reviewed-by: Thomas Huth <thuth@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/m68k/m68k-semi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'target') diff --git a/target/m68k/m68k-semi.c b/target/m68k/m68k-semi.c index 8e5fbfc8fa..27600e0cc0 100644 --- a/target/m68k/m68k-semi.c +++ b/target/m68k/m68k-semi.c @@ -26,6 +26,7 @@ #else #include "exec/gdbstub.h" #include "exec/softmmu-semi.h" +#include "hw/boards.h" #endif #include "qemu/log.h" @@ -455,8 +456,8 @@ void do_m68k_semihosting(CPUM68KState *env, int nr) * FIXME: This is wrong for boards where RAM does not start at * address zero. */ - env->dregs[1] = ram_size; - env->aregs[7] = ram_size; + env->dregs[1] = current_machine->ram_size; + env->aregs[7] = current_machine->ram_size; #endif return; default: -- cgit v1.2.3 From 382a04afa075927d843f11e9fb8c450a084bbfa8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@redhat.com> Date: Wed, 28 Oct 2020 06:17:07 -0400 Subject: s390x: do not use ram_size global Use the machine properties instead. Cc: Cornelia Huck <cohuck@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/s390x/excp_helper.c | 3 ++- target/s390x/mem_helper.c | 10 +++++++--- target/s390x/mmu_helper.c | 4 +++- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'target') diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c index 0adfbbda27..9cf66d3690 100644 --- a/target/s390x/excp_helper.c +++ b/target/s390x/excp_helper.c @@ -156,9 +156,10 @@ bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size, !address_space_access_valid(&address_space_memory, raddr, TARGET_PAGE_SIZE, access_type, MEMTXATTRS_UNSPECIFIED)) { + MachineState *ms = MACHINE(qdev_get_machine()); qemu_log_mask(CPU_LOG_MMU, "%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", - __func__, (uint64_t)raddr, (uint64_t)ram_size); + __func__, (uint64_t)raddr, (uint64_t)ms->ram_size); excp = PGM_ADDRESSING; tec = 0; /* unused */ } diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index bd25eed3e8..0108611cc9 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -31,6 +31,7 @@ #if !defined(CONFIG_USER_ONLY) #include "hw/s390x/storage-keys.h" +#include "hw/boards.h" #endif /*****************************************************************************/ @@ -2075,12 +2076,13 @@ uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2) /* insert storage key extended */ uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2) { + MachineState *ms = MACHINE(qdev_get_machine()); static S390SKeysState *ss; static S390SKeysClass *skeyclass; uint64_t addr = wrap_address(env, r2); uint8_t key; - if (addr > ram_size) { + if (addr > ms->ram_size) { return 0; } @@ -2098,12 +2100,13 @@ uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2) /* set storage key extended */ void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2) { + MachineState *ms = MACHINE(qdev_get_machine()); static S390SKeysState *ss; static S390SKeysClass *skeyclass; uint64_t addr = wrap_address(env, r2); uint8_t key; - if (addr > ram_size) { + if (addr > ms->ram_size) { return; } @@ -2124,11 +2127,12 @@ void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2) /* reset reference bit extended */ uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2) { + MachineState *ms = MACHINE(qdev_get_machine()); static S390SKeysState *ss; static S390SKeysClass *skeyclass; uint8_t re, key; - if (r2 > ram_size) { + if (r2 > ms->ram_size) { return 0; } diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c index 7d9f3059cd..d492b23a17 100644 --- a/target/s390x/mmu_helper.c +++ b/target/s390x/mmu_helper.c @@ -27,6 +27,7 @@ #include "trace.h" #include "hw/hw.h" #include "hw/s390x/storage-keys.h" +#include "hw/boards.h" /* Fetch/store bits in the translation exception code: */ #define FS_READ 0x800 @@ -292,10 +293,11 @@ static void mmu_handle_skey(target_ulong addr, int rw, int *flags) { static S390SKeysClass *skeyclass; static S390SKeysState *ss; + MachineState *ms = MACHINE(qdev_get_machine()); uint8_t key; int rc; - if (unlikely(addr >= ram_size)) { + if (unlikely(addr >= ms->ram_size)) { return; } -- cgit v1.2.3