diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-07-06 23:37:53 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-07-06 23:37:53 +0100 |
commit | f6e3035f75e5c6a73485335765ae070304c7a110 (patch) | |
tree | 60f388e6585b7075c1a721e14c6ce12f1bd394de | |
parent | 7edd8e4660beb301d527257f8e04ebec0f841cb0 (diff) | |
parent | 355023f2010c4df619d88a0dd7012b4b9c74c12c (diff) |
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream-smm' into staging
This series implements KVM support for SMM, and lets you enable/disable
it through the "smm" property of x86 machine types.
# gpg: Signature made Mon Jul 6 17:41:05 2015 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg: It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* remotes/bonzini/tags/for-upstream-smm:
pc: add SMM property
ich9: add smm_enabled field and arguments
pc_piix: rename kvm_enabled to smm_enabled
target-i386: register a separate KVM address space including SMRAM regions
kvm-all: kvm_irqchip_create is not expected to fail
kvm-all: add support for multiple address spaces
kvm-all: make KVM's memory listener more generic
kvm-all: move internal types to kvm_int.h
kvm-all: remove useless typedef
kvm-all: put kvm_mem_flags to more work
target-i386: add support for SMBASE MSR and SMIs
piix4/ich9: do not raise SMI on ACPI enable/disable commands
linux-headers: Update to 4.2-rc1
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | hw/acpi/ich9.c | 5 | ||||
-rw-r--r-- | hw/acpi/piix4.c | 13 | ||||
-rw-r--r-- | hw/i386/pc.c | 51 | ||||
-rw-r--r-- | hw/i386/pc_piix.c | 7 | ||||
-rw-r--r-- | hw/i386/pc_q35.c | 6 | ||||
-rw-r--r-- | hw/isa/lpc_ich9.c | 9 | ||||
-rw-r--r-- | include/hw/acpi/ich9.h | 3 | ||||
-rw-r--r-- | include/hw/i386/ich9.h | 2 | ||||
-rw-r--r-- | include/hw/i386/pc.h | 5 | ||||
-rw-r--r-- | include/standard-headers/linux/input.h | 10 | ||||
-rw-r--r-- | include/standard-headers/linux/virtio_balloon.h | 1 | ||||
-rw-r--r-- | include/standard-headers/linux/virtio_gpu.h | 2 | ||||
-rw-r--r-- | include/sysemu/kvm_int.h | 39 | ||||
-rw-r--r-- | kvm-all.c | 243 | ||||
-rw-r--r-- | linux-headers/asm-x86/hyperv.h | 11 | ||||
-rw-r--r-- | linux-headers/linux/kvm.h | 2 | ||||
-rw-r--r-- | linux-headers/linux/vfio.h | 102 | ||||
-rw-r--r-- | linux-headers/linux/virtio_pci.h | 192 | ||||
-rw-r--r-- | target-i386/cpu.h | 1 | ||||
-rw-r--r-- | target-i386/kvm-stub.c | 5 | ||||
-rw-r--r-- | target-i386/kvm.c | 151 | ||||
-rw-r--r-- | target-i386/kvm_i386.h | 1 |
22 files changed, 517 insertions, 344 deletions
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 8a64ffb38f..f4dc7a84be 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -192,7 +192,7 @@ static void pm_reset(void *opaque) acpi_pm_tmr_reset(&pm->acpi_regs); acpi_gpe_reset(&pm->acpi_regs); - if (kvm_enabled()) { + if (!pm->smm_enabled) { /* Mark SMM as already inited to prevent SMM from running. KVM does not * support SMM mode. */ pm->smi_en |= ICH9_PMIO_SMI_EN_APMC_EN; @@ -209,7 +209,7 @@ static void pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&pm->acpi_regs); } -void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, +void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, bool smm_enabled, qemu_irq sci_irq) { memory_region_init(&pm->io, OBJECT(lpc_pci), "ich9-pm", ICH9_PMIO_SIZE); @@ -231,6 +231,7 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, "acpi-smi", 8); memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi); + pm->smm_enabled = smm_enabled; pm->irq = sci_irq; qemu_register_reset(pm_reset, pm); pm->powerdown_notifier.notify = pm_powerdown_req; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 3bd1d5a865..2cd2fee897 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -72,7 +72,7 @@ typedef struct PIIX4PMState { qemu_irq irq; qemu_irq smi_irq; - int kvm_enabled; + int smm_enabled; Notifier machine_ready; Notifier powerdown_notifier; @@ -112,6 +112,9 @@ static void apm_ctrl_changed(uint32_t val, void *arg) /* ACPI specs 3.0, 4.7.2.5 */ acpi_pm1_cnt_update(&s->ar, val == ACPI_ENABLE, val == ACPI_DISABLE); + if (val == ACPI_ENABLE || val == ACPI_DISABLE) { + return; + } if (d->config[0x5b] & (1 << 1)) { if (s->smi_irq) { @@ -319,7 +322,7 @@ static void piix4_reset(void *opaque) pci_conf[0x40] = 0x01; /* PM io base read only bit */ pci_conf[0x80] = 0; - if (s->kvm_enabled) { + if (!s->smm_enabled) { /* Mark SMM as already inited (until KVM supports SMM). */ pci_conf[0x5B] = 0x02; } @@ -450,7 +453,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp) /* APM */ apm_init(dev, &s->apm, apm_ctrl_changed, s); - if (s->kvm_enabled) { + if (!s->smm_enabled) { /* Mark SMM as already inited to prevent SMM from running. KVM does not * support SMM mode. */ pci_conf[0x5B] = 0x02; @@ -501,7 +504,7 @@ Object *piix4_pm_find(void) I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, DeviceState **piix4_pm) + int smm_enabled, DeviceState **piix4_pm) { DeviceState *dev; PIIX4PMState *s; @@ -515,7 +518,7 @@ I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, s = PIIX4_PM(dev); s->irq = sci_irq; s->smi_irq = smi_irq; - s->kvm_enabled = kvm_enabled; + s->smm_enabled = smm_enabled; if (xen_enabled()) { s->use_acpi_pci_hotplug = false; } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index a66416d188..7959b44b6b 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1768,6 +1768,48 @@ static void pc_machine_set_vmport(Object *obj, Visitor *v, void *opaque, visit_type_OnOffAuto(v, &pcms->vmport, name, errp); } +bool pc_machine_is_smm_enabled(PCMachineState *pcms) +{ + bool smm_available = false; + + if (pcms->smm == ON_OFF_AUTO_OFF) { + return false; + } + + if (tcg_enabled() || qtest_enabled()) { + smm_available = true; + } else if (kvm_enabled()) { + smm_available = kvm_has_smm(); + } + + if (smm_available) { + return true; + } + + if (pcms->smm == ON_OFF_AUTO_ON) { + error_report("System Management Mode not supported by this hypervisor."); + exit(1); + } + return false; +} + +static void pc_machine_get_smm(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + OnOffAuto smm = pcms->smm; + + visit_type_OnOffAuto(v, &smm, name, errp); +} + +static void pc_machine_set_smm(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + visit_type_OnOffAuto(v, &pcms->smm, name, errp); +} + static bool pc_machine_get_aligned_dimm(Object *obj, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); @@ -1792,6 +1834,15 @@ static void pc_machine_initfn(Object *obj) "Maximum ram below the 4G boundary (32bit boundary)", NULL); + pcms->smm = ON_OFF_AUTO_AUTO; + object_property_add(obj, PC_MACHINE_SMM, "OnOffAuto", + pc_machine_get_smm, + pc_machine_set_smm, + NULL, NULL, NULL); + object_property_set_description(obj, PC_MACHINE_SMM, + "Enable SMM (pc & q35)", + NULL); + pcms->vmport = ON_OFF_AUTO_AUTO; object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto", pc_machine_get_vmport, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index e142f75649..56cdcb9661 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -287,7 +287,8 @@ static void pc_init1(MachineState *machine) /* TODO: Populate SPD eeprom data. */ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, gsi[9], smi_irq, - kvm_enabled(), &piix4_pm); + pc_machine_is_smm_enabled(pc_machine), + &piix4_pm); smbus_eeprom_init(smbus, 8, NULL, 0); object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, @@ -306,7 +307,11 @@ static void pc_init1(MachineState *machine) static void pc_compat_2_3(MachineState *machine) { + PCMachineState *pcms = PC_MACHINE(machine); savevm_skip_section_footers(); + if (kvm_enabled()) { + pcms->smm = ON_OFF_AUTO_OFF; + } } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 082cd93bb2..8aa3a67fdf 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -253,7 +253,7 @@ static void pc_q35_init(MachineState *machine) (pc_machine->vmport != ON_OFF_AUTO_ON), 0xff0104); /* connect pm stuff to lpc */ - ich9_lpc_pm_init(lpc); + ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine)); /* ahci and SATA device, for q35 1 ahci controller is built-in */ ahci = pci_create_simple_multifunction(host_bus, @@ -290,7 +290,11 @@ static void pc_q35_init(MachineState *machine) static void pc_compat_2_3(MachineState *machine) { + PCMachineState *pcms = PC_MACHINE(machine); savevm_skip_section_footers(); + if (kvm_enabled()) { + pcms->smm = ON_OFF_AUTO_OFF; + } } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index b3e0b1fd52..bd655b8405 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -357,11 +357,13 @@ static void ich9_set_sci(void *opaque, int irq_num, int level) } } -void ich9_lpc_pm_init(PCIDevice *lpc_pci) +void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(lpc_pci); + qemu_irq sci_irq; - ich9_pm_init(lpc_pci, &lpc->pm, qemu_allocate_irq(ich9_set_sci, lpc, 0)); + sci_irq = qemu_allocate_irq(ich9_set_sci, lpc, 0); + ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, sci_irq); ich9_lpc_reset(&lpc->d.qdev); } @@ -375,6 +377,9 @@ static void ich9_apm_ctrl_changed(uint32_t val, void *arg) acpi_pm1_cnt_update(&lpc->pm.acpi_regs, val == ICH9_APM_ACPI_ENABLE, val == ICH9_APM_ACPI_DISABLE); + if (val == ICH9_APM_ACPI_ENABLE || val == ICH9_APM_ACPI_DISABLE) { + return; + } /* SMI_EN = PMBASE + 30. SMI control and enable register */ if (lpc->pm.smi_en & ICH9_PMIO_SMI_EN_APMC_EN) { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index 77cc65cbc2..ac24bbe9a3 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -54,10 +54,11 @@ typedef struct ICH9LPCPMRegs { uint8_t disable_s3; uint8_t disable_s4; uint8_t s4_val; + uint8_t smm_enabled; } ICH9LPCPMRegs; void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, - qemu_irq sci_irq); + bool smm_enabled, qemu_irq sci_irq); void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base); extern const VMStateDescription vmstate_ich9_pm; diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h index a2cc15c915..b317a481c8 100644 --- a/include/hw/i386/ich9.h +++ b/include/hw/i386/ich9.h @@ -17,7 +17,7 @@ void ich9_lpc_set_irq(void *opaque, int irq_num, int level); int ich9_lpc_map_irq(PCIDevice *pci_dev, int intx); PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin); -void ich9_lpc_pm_init(PCIDevice *pci_lpc); +void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled); I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base); #define ICH9_CC_SIZE (16 * 1024) /* 16KB */ diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 328c8f72e0..786a1d511c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -37,6 +37,7 @@ struct PCMachineState { uint64_t max_ram_below_4g; OnOffAuto vmport; + OnOffAuto smm; bool enforce_aligned_dimm; }; @@ -44,6 +45,7 @@ struct PCMachineState { #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size" #define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" #define PC_MACHINE_VMPORT "vmport" +#define PC_MACHINE_SMM "smm" #define PC_MACHINE_ENFORCE_ALIGNED_DIMM "enforce-aligned-dimm" /** @@ -155,6 +157,7 @@ void i8042_setup_a20_line(ISADevice *dev, qemu_irq *a20_out); /* pc.c */ extern int fd_bootchk; +bool pc_machine_is_smm_enabled(PCMachineState *pcms); void pc_register_ferr_irq(qemu_irq irq); void pc_acpi_smi_interrupt(void *opaque, int irq, int level); @@ -214,7 +217,7 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, DeviceState **piix4_pm); + int smm_enabled, DeviceState **piix4_pm); void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr); /* hpet.c */ diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index b94d365f28..a459dd25da 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -367,7 +367,8 @@ struct input_keymap_entry { #define KEY_MSDOS 151 #define KEY_COFFEE 152 /* AL Terminal Lock/Screensaver */ #define KEY_SCREENLOCK KEY_COFFEE -#define KEY_DIRECTION 153 +#define KEY_ROTATE_DISPLAY 153 /* Display orientation for e.g. tablets */ +#define KEY_DIRECTION KEY_ROTATE_DISPLAY #define KEY_CYCLEWINDOWS 154 #define KEY_MAIL 155 #define KEY_BOOKMARKS 156 /* AC Bookmarks */ @@ -700,6 +701,10 @@ struct input_keymap_entry { #define KEY_NUMERIC_9 0x209 #define KEY_NUMERIC_STAR 0x20a #define KEY_NUMERIC_POUND 0x20b +#define KEY_NUMERIC_A 0x20c /* Phone key A - HUT Telephony 0xb9 */ +#define KEY_NUMERIC_B 0x20d +#define KEY_NUMERIC_C 0x20e +#define KEY_NUMERIC_D 0x20f #define KEY_CAMERA_FOCUS 0x210 #define KEY_WPS_BUTTON 0x211 /* WiFi Protected Setup key */ @@ -971,7 +976,8 @@ struct input_keymap_entry { */ #define MT_TOOL_FINGER 0 #define MT_TOOL_PEN 1 -#define MT_TOOL_MAX 1 +#define MT_TOOL_PALM 2 +#define MT_TOOL_MAX 2 /* * Values describing the status of a force-feedback effect diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h index 88ada1d048..2e2a6dcf3a 100644 --- a/include/standard-headers/linux/virtio_balloon.h +++ b/include/standard-headers/linux/virtio_balloon.h @@ -26,6 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_types.h" #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_config.h" diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index cfcfb463fc..72ef815f51 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -38,6 +38,8 @@ #ifndef VIRTIO_GPU_HW_H #define VIRTIO_GPU_HW_H +#include "standard-headers/linux/types.h" + enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h new file mode 100644 index 0000000000..888557a1ca --- /dev/null +++ b/include/sysemu/kvm_int.h @@ -0,0 +1,39 @@ +/* + * Internal definitions for a target's KVM support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_KVM_INT_H +#define QEMU_KVM_INT_H + +#include "sysemu/sysemu.h" +#include "sysemu/accel.h" +#include "sysemu/kvm.h" + +typedef struct KVMSlot +{ + hwaddr start_addr; + ram_addr_t memory_size; + void *ram; + int slot; + int flags; +} KVMSlot; + +typedef struct KVMMemoryListener { + MemoryListener listener; + KVMSlot *slots; + int as_id; +} KVMMemoryListener; + +#define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm") + +#define KVM_STATE(obj) \ + OBJECT_CHECK(KVMState, (obj), TYPE_KVM_ACCEL) + +void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + AddressSpace *as, int as_id); + +#endif @@ -24,13 +24,11 @@ #include "qemu/atomic.h" #include "qemu/option.h" #include "qemu/config-file.h" -#include "sysemu/sysemu.h" -#include "sysemu/accel.h" #include "hw/hw.h" #include "hw/pci/msi.h" #include "hw/s390x/adapter.h" #include "exec/gdbstub.h" -#include "sysemu/kvm.h" +#include "sysemu/kvm_int.h" #include "qemu/bswap.h" #include "exec/memory.h" #include "exec/ram_addr.h" @@ -60,22 +58,10 @@ #define KVM_MSI_HASHTAB_SIZE 256 -typedef struct KVMSlot -{ - hwaddr start_addr; - ram_addr_t memory_size; - void *ram; - int slot; - int flags; -} KVMSlot; - -typedef struct kvm_dirty_log KVMDirtyLog; - struct KVMState { AccelState parent_obj; - KVMSlot *slots; int nr_slots; int fd; int vmfd; @@ -106,13 +92,9 @@ struct KVMState QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; bool direct_msi; #endif + KVMMemoryListener memory_listener; }; -#define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm") - -#define KVM_STATE(obj) \ - OBJECT_CHECK(KVMState, (obj), TYPE_KVM_ACCEL) - KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_async_interrupts_allowed; @@ -133,13 +115,14 @@ static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_LAST_INFO }; -static KVMSlot *kvm_get_free_slot(KVMState *s) +static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml) { + KVMState *s = kvm_state; int i; for (i = 0; i < s->nr_slots; i++) { - if (s->slots[i].memory_size == 0) { - return &s->slots[i]; + if (kml->slots[i].memory_size == 0) { + return &kml->slots[i]; } } @@ -148,12 +131,14 @@ static KVMSlot *kvm_get_free_slot(KVMState *s) bool kvm_has_free_slot(MachineState *ms) { - return kvm_get_free_slot(KVM_STATE(ms->accelerator)); + KVMState *s = KVM_STATE(ms->accelerator); + + return kvm_get_free_slot(&s->memory_listener); } -static KVMSlot *kvm_alloc_slot(KVMState *s) +static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml) { - KVMSlot *slot = kvm_get_free_slot(s); + KVMSlot *slot = kvm_get_free_slot(kml); if (slot) { return slot; @@ -163,14 +148,15 @@ static KVMSlot *kvm_alloc_slot(KVMState *s) abort(); } -static KVMSlot *kvm_lookup_matching_slot(KVMState *s, +static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml, hwaddr start_addr, hwaddr end_addr) { + KVMState *s = kvm_state; int i; for (i = 0; i < s->nr_slots; i++) { - KVMSlot *mem = &s->slots[i]; + KVMSlot *mem = &kml->slots[i]; if (start_addr == mem->start_addr && end_addr == mem->start_addr + mem->memory_size) { @@ -184,15 +170,16 @@ static KVMSlot *kvm_lookup_matching_slot(KVMState *s, /* * Find overlapping slot with lowest start address */ -static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s, +static KVMSlot *kvm_lookup_overlapping_slot(KVMMemoryListener *kml, hwaddr start_addr, hwaddr end_addr) { + KVMState *s = kvm_state; KVMSlot *found = NULL; int i; for (i = 0; i < s->nr_slots; i++) { - KVMSlot *mem = &s->slots[i]; + KVMSlot *mem = &kml->slots[i]; if (mem->memory_size == 0 || (found && found->start_addr < mem->start_addr)) { @@ -211,10 +198,11 @@ static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s, int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, hwaddr *phys_addr) { + KVMMemoryListener *kml = &s->memory_listener; int i; for (i = 0; i < s->nr_slots; i++) { - KVMSlot *mem = &s->slots[i]; + KVMSlot *mem = &kml->slots[i]; if (ram >= mem->ram && ram < mem->ram + mem->memory_size) { *phys_addr = mem->start_addr + (ram - mem->ram); @@ -225,11 +213,12 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, return 0; } -static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) +static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot) { + KVMState *s = kvm_state; struct kvm_userspace_memory_region mem; - mem.slot = slot->slot; + mem.slot = slot->slot | (kml->as_id << 16); mem.guest_phys_addr = slot->start_addr; mem.userspace_addr = (unsigned long)slot->ram; mem.flags = slot->flags; @@ -291,45 +280,47 @@ err: * dirty pages logging control */ -static int kvm_mem_flags(KVMState *s, bool log_dirty, bool readonly) +static int kvm_mem_flags(MemoryRegion *mr) { + bool readonly = mr->readonly || memory_region_is_romd(mr); int flags = 0; - flags = log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0; + + if (memory_region_get_dirty_log_mask(mr) != 0) { + flags |= KVM_MEM_LOG_DIRTY_PAGES; + } if (readonly && kvm_readonly_mem_allowed) { flags |= KVM_MEM_READONLY; } return flags; } -static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty) +static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, + MemoryRegion *mr) { - KVMState *s = kvm_state; - int flags, mask = KVM_MEM_LOG_DIRTY_PAGES; int old_flags; old_flags = mem->flags; - - flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty, false); - mem->flags = flags; + mem->flags = kvm_mem_flags(mr); /* If nothing changed effectively, no need to issue ioctl */ - if (flags == old_flags) { + if (mem->flags == old_flags) { return 0; } - return kvm_set_user_memory_region(s, mem); + return kvm_set_user_memory_region(kml, mem); } -static int kvm_dirty_pages_log_change(hwaddr phys_addr, - ram_addr_t size, bool log_dirty) +static int kvm_section_update_flags(KVMMemoryListener *kml, + MemoryRegionSection *section) { - KVMState *s = kvm_state; - KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size); + hwaddr phys_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + KVMSlot *mem = kvm_lookup_matching_slot(kml, phys_addr, phys_addr + size); if (mem == NULL) { return 0; } else { - return kvm_slot_dirty_pages_log_change(mem, log_dirty); + return kvm_slot_update_flags(kml, mem, section->mr); } } @@ -337,14 +328,14 @@ static void kvm_log_start(MemoryListener *listener, MemoryRegionSection *section, int old, int new) { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); int r; if (old != 0) { return; } - r = kvm_dirty_pages_log_change(section->offset_within_address_space, - int128_get64(section->size), true); + r = kvm_section_update_flags(kml, section); if (r < 0) { abort(); } @@ -354,14 +345,14 @@ static void kvm_log_stop(MemoryListener *listener, MemoryRegionSection *section, int old, int new) { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); int r; if (new != 0) { return; } - r = kvm_dirty_pages_log_change(section->offset_within_address_space, - int128_get64(section->size), false); + r = kvm_section_update_flags(kml, section); if (r < 0) { abort(); } @@ -389,11 +380,12 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, * @start_add: start of logged region. * @end_addr: end of logged region. */ -static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section) +static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + MemoryRegionSection *section) { KVMState *s = kvm_state; unsigned long size, allocated_size = 0; - KVMDirtyLog d = {}; + struct kvm_dirty_log d = {}; KVMSlot *mem; int ret = 0; hwaddr start_addr = section->offset_within_address_space; @@ -401,7 +393,7 @@ static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section) d.dirty_bitmap = NULL; while (start_addr < end_addr) { - mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr); + mem = kvm_lookup_overlapping_slot(kml, start_addr, end_addr); if (mem == NULL) { break; } @@ -428,8 +420,7 @@ static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section) allocated_size = size; memset(d.dirty_bitmap, 0, allocated_size); - d.slot = mem->slot; - + d.slot = mem->slot | (kml->as_id << 16); if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) { DPRINTF("ioctl failed %d\n", errno); ret = -1; @@ -632,15 +623,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) return NULL; } -static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) +static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) { KVMState *s = kvm_state; KVMSlot *mem, old; int err; MemoryRegion *mr = section->mr; - bool log_dirty = memory_region_get_dirty_log_mask(mr) != 0; bool writeable = !mr->readonly && !mr->rom_device; - bool readonly_flag = mr->readonly || memory_region_is_romd(mr); hwaddr start_addr = section->offset_within_address_space; ram_addr_t size = int128_get64(section->size); void *ram = NULL; @@ -674,7 +664,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta; while (1) { - mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size); + mem = kvm_lookup_overlapping_slot(kml, start_addr, start_addr + size); if (!mem) { break; } @@ -684,19 +674,19 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) (ram - start_addr == mem->ram - mem->start_addr)) { /* The new slot fits into the existing one and comes with * identical parameters - update flags and done. */ - kvm_slot_dirty_pages_log_change(mem, log_dirty); + kvm_slot_update_flags(kml, mem, mr); return; } old = *mem; if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { - kvm_physical_sync_dirty_bitmap(section); + kvm_physical_sync_dirty_bitmap(kml, section); } /* unregister the overlapping slot */ mem->memory_size = 0; - err = kvm_set_user_memory_region(s, mem); + err = kvm_set_user_memory_region(kml, mem); if (err) { fprintf(stderr, "%s: error unregistering overlapping slot: %s\n", __func__, strerror(-err)); @@ -713,13 +703,13 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) * - and actually require a recent KVM version. */ if (s->broken_set_mem_region && old.start_addr == start_addr && old.memory_size < size && add) { - mem = kvm_alloc_slot(s); + mem = kvm_alloc_slot(kml); mem->memory_size = old.memory_size; mem->start_addr = old.start_addr; mem->ram = old.ram; - mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); + mem->flags = kvm_mem_flags(mr); - err = kvm_set_user_memory_region(s, mem); + err = kvm_set_user_memory_region(kml, mem); if (err) { fprintf(stderr, "%s: error updating slot: %s\n", __func__, strerror(-err)); @@ -734,13 +724,13 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) /* register prefix slot */ if (old.start_addr < start_addr) { - mem = kvm_alloc_slot(s); + mem = kvm_alloc_slot(kml); mem->memory_size = start_addr - old.start_addr; mem->start_addr = old.start_addr; mem->ram = old.ram; - mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); + mem->flags = kvm_mem_flags(mr); - err = kvm_set_user_memory_region(s, mem); + err = kvm_set_user_memory_region(kml, mem); if (err) { fprintf(stderr, "%s: error registering prefix slot: %s\n", __func__, strerror(-err)); @@ -757,14 +747,14 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) if (old.start_addr + old.memory_size > start_addr + size) { ram_addr_t size_delta; - mem = kvm_alloc_slot(s); + mem = kvm_alloc_slot(kml); mem->start_addr = start_addr + size; size_delta = mem->start_addr - old.start_addr; mem->memory_size = old.memory_size - size_delta; mem->ram = old.ram + size_delta; - mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); + mem->flags = kvm_mem_flags(mr); - err = kvm_set_user_memory_region(s, mem); + err = kvm_set_user_memory_region(kml, mem); if (err) { fprintf(stderr, "%s: error registering suffix slot: %s\n", __func__, strerror(-err)); @@ -780,13 +770,13 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) if (!add) { return; } - mem = kvm_alloc_slot(s); + mem = kvm_alloc_slot(kml); mem->memory_size = size; mem->start_addr = start_addr; mem->ram = ram; - mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag); + mem->flags = kvm_mem_flags(mr); - err = kvm_set_user_memory_region(s, mem); + err = kvm_set_user_memory_region(kml, mem); if (err) { fprintf(stderr, "%s: error registering slot: %s\n", __func__, strerror(-err)); @@ -797,23 +787,28 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) static void kvm_region_add(MemoryListener *listener, MemoryRegionSection *section) { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); + memory_region_ref(section->mr); - kvm_set_phys_mem(section, true); + kvm_set_phys_mem(kml, section, true); } static void kvm_region_del(MemoryListener *listener, MemoryRegionSection *section) { - kvm_set_phys_mem(section, false); + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); + + kvm_set_phys_mem(kml, section, false); memory_region_unref(section->mr); } static void kvm_log_sync(MemoryListener *listener, MemoryRegionSection *section) { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); int r; - r = kvm_physical_sync_dirty_bitmap(section); + r = kvm_physical_sync_dirty_bitmap(kml, section); if (r < 0) { abort(); } @@ -888,18 +883,27 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener, } } -static MemoryListener kvm_memory_listener = { - .region_add = kvm_region_add, - .region_del = kvm_region_del, - .log_start = kvm_log_start, - .log_stop = kvm_log_stop, - .log_sync = kvm_log_sync, - .eventfd_add = kvm_mem_ioeventfd_add, - .eventfd_del = kvm_mem_ioeventfd_del, - .coalesced_mmio_add = kvm_coalesce_mmio_region, - .coalesced_mmio_del = kvm_uncoalesce_mmio_region, - .priority = 10, -}; +void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + AddressSpace *as, int as_id) +{ + int i; + + kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot)); + kml->as_id = as_id; + + for (i = 0; i < s->nr_slots; i++) { + kml->slots[i].slot = i; + } + + kml->listener.region_add = kvm_region_add; + kml->listener.region_del = kvm_region_del; + kml->listener.log_start = kvm_log_start; + kml->listener.log_stop = kvm_log_stop; + kml->listener.log_sync = kvm_log_sync; + kml->listener.priority = 10; + + memory_listener_register(&kml->listener, as); +} static MemoryListener kvm_io_listener = { .eventfd_add = kvm_io_ioeventfd_add, @@ -1341,27 +1345,31 @@ int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq) false); } -static int kvm_irqchip_create(MachineState *machine, KVMState *s) +static void kvm_irqchip_create(MachineState *machine, KVMState *s) { int ret; - if (!machine_kernel_irqchip_allowed(machine) || - (!kvm_check_extension(s, KVM_CAP_IRQCHIP) && - (kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0) < 0))) { - return 0; + if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) { + ; + } else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) { + ret = kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0); + if (ret < 0) { + fprintf(stderr, "Enable kernel irqchip failed: %s\n", strerror(-ret)); + exit(1); + } + } else { + return; } /* First probe and see if there's a arch-specific hook to create the * in-kernel irqchip for us */ ret = kvm_arch_irqchip_create(s); - if (ret < 0) { - return ret; - } else if (ret == 0) { + if (ret == 0) { ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); - if (ret < 0) { - fprintf(stderr, "Create kernel irqchip failed\n"); - return ret; - } + } + if (ret < 0) { + fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret)); + exit(1); } kvm_kernel_irqchip = true; @@ -1372,8 +1380,6 @@ static int kvm_irqchip_create(MachineState *machine, KVMState *s) kvm_halt_in_kernel_allowed = true; kvm_init_irq_routing(s); - - return 0; } /* Find number of supported CPUs using the recommended @@ -1410,7 +1416,7 @@ static int kvm_init(MachineState *ms) KVMState *s; const KVMCapabilityInfo *missing_cap; int ret; - int i, type = 0; + int type = 0; const char *kvm_type; s = KVM_STATE(ms->accelerator); @@ -1459,12 +1465,6 @@ static int kvm_init(MachineState *ms) s->nr_slots = 32; } - s->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot)); - - for (i = 0; i < s->nr_slots; i++) { - s->slots[i].slot = i; - } - /* check the vcpu limits */ soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -1596,14 +1596,21 @@ static int kvm_init(MachineState *ms) goto err; } - ret = kvm_irqchip_create(ms, s); - if (ret < 0) { - goto err; + if (machine_kernel_irqchip_allowed(ms)) { + kvm_irqchip_create(ms, s); } kvm_state = s; - memory_listener_register(&kvm_memory_listener, &address_space_memory); - memory_listener_register(&kvm_io_listener, &address_space_io); + + s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add; + s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; + s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region; + s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region; + + kvm_memory_listener_register(s, &s->memory_listener, + &address_space_memory, 0); + memory_listener_register(&kvm_io_listener, + &address_space_io); s->many_ioeventfds = kvm_check_many_ioeventfds(); @@ -1619,7 +1626,7 @@ err: if (s->fd != -1) { close(s->fd); } - g_free(s->slots); + g_free(s->memory_listener.slots); return ret; } diff --git a/linux-headers/asm-x86/hyperv.h b/linux-headers/asm-x86/hyperv.h index ce6068dbcf..8fba544e9c 100644 --- a/linux-headers/asm-x86/hyperv.h +++ b/linux-headers/asm-x86/hyperv.h @@ -199,6 +199,17 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +/* Hyper-V guest crash notification MSR's */ +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 +#define HV_X64_MSR_CRASH_CTL_NOTIFY (1ULL << 63) +#define HV_X64_MSR_CRASH_PARAMS \ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index fad9e5c561..3bac8736d8 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -897,7 +897,7 @@ struct kvm_xen_hvm_config { * * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * the irqfd to operate in resampling mode for level triggered interrupt - * emlation. See Documentation/virtual/kvm/api.txt. + * emulation. See Documentation/virtual/kvm/api.txt. */ #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 0508d0b5d2..aa276bce39 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -36,6 +36,8 @@ /* Two-stage IOMMU */ #define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -443,6 +445,23 @@ struct vfio_iommu_type1_dma_unmap { /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + +/* * The SPAPR TCE info struct provides the information about the PCI bus * address ranges available for DMA, these values are programmed into * the hardware so the guest has to know that information. @@ -452,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap { * addresses too so the window works as a filter rather than an offset * for IOVA addresses. * - * A flag will need to be added if other page sizes are supported, - * so as defined here, it is always 4k. + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. */ struct vfio_iommu_spapr_tce_info { __u32 argsz; - __u32 flags; /* reserved for future use */ + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ __u32 dma32_window_start; /* 32 bit window start (bytes) */ __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; }; #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) @@ -470,12 +492,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; + union { + struct vfio_eeh_pe_err err; + }; }; #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ @@ -492,9 +525,70 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u64 window_size; + __u32 levels; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + /* ***************************************************************** */ #endif /* VFIO_H */ diff --git a/linux-headers/linux/virtio_pci.h b/linux-headers/linux/virtio_pci.h deleted file mode 100644 index 92624e5310..0000000000 --- a/linux-headers/linux/virtio_pci.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Virtio PCI driver - * - * This module allows virtio devices to be used over a virtual PCI device. - * This can be used with QEMU based VMMs like KVM or Xen. - * - * Copyright IBM Corp. 2007 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _LINUX_VIRTIO_PCI_H -#define _LINUX_VIRTIO_PCI_H - -#include <linux/types.h> - -#ifndef VIRTIO_PCI_NO_LEGACY - -/* A 32-bit r/o bitmask of the features supported by the host */ -#define VIRTIO_PCI_HOST_FEATURES 0 - -/* A 32-bit r/w bitmask of features activated by the guest */ -#define VIRTIO_PCI_GUEST_FEATURES 4 - -/* A 32-bit r/w PFN for the currently selected queue */ -#define VIRTIO_PCI_QUEUE_PFN 8 - -/* A 16-bit r/o queue size for the currently selected queue */ -#define VIRTIO_PCI_QUEUE_NUM 12 - -/* A 16-bit r/w queue selector */ -#define VIRTIO_PCI_QUEUE_SEL 14 - -/* A 16-bit r/w queue notifier */ -#define VIRTIO_PCI_QUEUE_NOTIFY 16 - -/* An 8-bit device status register. */ -#define VIRTIO_PCI_STATUS 18 - -/* An 8-bit r/o interrupt status register. Reading the value will return the - * current contents of the ISR and will also clear it. This is effectively - * a read-and-acknowledge. */ -#define VIRTIO_PCI_ISR 19 - -/* MSI-X registers: only enabled if MSI-X is enabled. */ -/* A 16-bit vector for configuration changes. */ -#define VIRTIO_MSI_CONFIG_VECTOR 20 -/* A 16-bit vector for selected queue notifications. */ -#define VIRTIO_MSI_QUEUE_VECTOR 22 - -/* The remaining space is defined by each driver as the per-driver - * configuration space */ -#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) -/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ -#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) - -/* Virtio ABI version, this must match exactly */ -#define VIRTIO_PCI_ABI_VERSION 0 - -/* How many bits to shift physical queue address written to QUEUE_PFN. - * 12 is historical, and due to x86 page size. */ -#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 - -/* The alignment to use between consumer and producer parts of vring. - * x86 pagesize again. */ -#define VIRTIO_PCI_VRING_ALIGN 4096 - -#endif /* VIRTIO_PCI_NO_LEGACY */ - -/* The bit of the ISR which indicates a device configuration change. */ -#define VIRTIO_PCI_ISR_CONFIG 0x2 -/* Vector value used to disable MSI for queue */ -#define VIRTIO_MSI_NO_VECTOR 0xffff - -#ifndef VIRTIO_PCI_NO_MODERN - -/* IDs for different capabilities. Must all exist. */ - -/* Common configuration */ -#define VIRTIO_PCI_CAP_COMMON_CFG 1 -/* Notifications */ -#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 -/* ISR access */ -#define VIRTIO_PCI_CAP_ISR_CFG 3 -/* Device specific confiuration */ -#define VIRTIO_PCI_CAP_DEVICE_CFG 4 - -/* This is the PCI capability header: */ -struct virtio_pci_cap { - __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ - __u8 cap_next; /* Generic PCI field: next ptr. */ - __u8 cap_len; /* Generic PCI field: capability length */ - __u8 cfg_type; /* Identifies the structure. */ - __u8 bar; /* Where to find it. */ - __u8 padding[3]; /* Pad to full dword. */ - __le32 offset; /* Offset within bar. */ - __le32 length; /* Length of the structure, in bytes. */ -}; - -struct virtio_pci_notify_cap { - struct virtio_pci_cap cap; - __le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ -}; - -/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ -struct virtio_pci_common_cfg { - /* About the whole device. */ - __le32 device_feature_select; /* read-write */ - __le32 device_feature; /* read-only */ - __le32 guest_feature_select; /* read-write */ - __le32 guest_feature; /* read-write */ - __le16 msix_config; /* read-write */ - __le16 num_queues; /* read-only */ - __u8 device_status; /* read-write */ - __u8 config_generation; /* read-only */ - - /* About a specific virtqueue. */ - __le16 queue_select; /* read-write */ - __le16 queue_size; /* read-write, power of 2. */ - __le16 queue_msix_vector; /* read-write */ - __le16 queue_enable; /* read-write */ - __le16 queue_notify_off; /* read-only */ - __le32 queue_desc_lo; /* read-write */ - __le32 queue_desc_hi; /* read-write */ - __le32 queue_avail_lo; /* read-write */ - __le32 queue_avail_hi; /* read-write */ - __le32 queue_used_lo; /* read-write */ - __le32 queue_used_hi; /* read-write */ -}; - -/* Macro versions of offsets for the Old Timers! */ -#define VIRTIO_PCI_CAP_VNDR 0 -#define VIRTIO_PCI_CAP_NEXT 1 -#define VIRTIO_PCI_CAP_LEN 2 -#define VIRTIO_PCI_CAP_CFG_TYPE 3 -#define VIRTIO_PCI_CAP_BAR 4 -#define VIRTIO_PCI_CAP_OFFSET 8 -#define VIRTIO_PCI_CAP_LENGTH 12 - -#define VIRTIO_PCI_NOTIFY_CAP_MULT 16 - - -#define VIRTIO_PCI_COMMON_DFSELECT 0 -#define VIRTIO_PCI_COMMON_DF 4 -#define VIRTIO_PCI_COMMON_GFSELECT 8 -#define VIRTIO_PCI_COMMON_GF 12 -#define VIRTIO_PCI_COMMON_MSIX 16 -#define VIRTIO_PCI_COMMON_NUMQ 18 -#define VIRTIO_PCI_COMMON_STATUS 20 -#define VIRTIO_PCI_COMMON_CFGGENERATION 21 -#define VIRTIO_PCI_COMMON_Q_SELECT 22 -#define VIRTIO_PCI_COMMON_Q_SIZE 24 -#define VIRTIO_PCI_COMMON_Q_MSIX 26 -#define VIRTIO_PCI_COMMON_Q_ENABLE 28 -#define VIRTIO_PCI_COMMON_Q_NOFF 30 -#define VIRTIO_PCI_COMMON_Q_DESCLO 32 -#define VIRTIO_PCI_COMMON_Q_DESCHI 36 -#define VIRTIO_PCI_COMMON_Q_AVAILLO 40 -#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 -#define VIRTIO_PCI_COMMON_Q_USEDLO 48 -#define VIRTIO_PCI_COMMON_Q_USEDHI 52 - -#endif /* VIRTIO_PCI_NO_MODERN */ - -#endif diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 603aaf0924..ac39291b48 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -314,6 +314,7 @@ #define MSR_P6_PERFCTR0 0xc1 +#define MSR_IA32_SMBASE 0x9e #define MSR_MTRRcap 0xfe #define MSR_MTRRcap_VCNT 8 #define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) diff --git a/target-i386/kvm-stub.c b/target-i386/kvm-stub.c index 2b9e8011fb..6fefd65c23 100644 --- a/target-i386/kvm-stub.c +++ b/target-i386/kvm-stub.c @@ -18,6 +18,11 @@ bool kvm_allows_irq0_override(void) } #ifndef __OPTIMIZE__ +bool kvm_has_smm(void) +{ + return 1; +} + /* This function is only called inside conditionals which we * rely on the compiler to optimize out when CONFIG_KVM is not * defined. diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 6426600c63..9038bf7077 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -22,7 +22,7 @@ #include "qemu-common.h" #include "sysemu/sysemu.h" -#include "sysemu/kvm.h" +#include "sysemu/kvm_int.h" #include "kvm_i386.h" #include "cpu.h" #include "exec/gdbstub.h" @@ -73,6 +73,7 @@ static bool has_msr_feature_control; static bool has_msr_async_pf_en; static bool has_msr_pv_eoi_en; static bool has_msr_misc_enable; +static bool has_msr_smbase; static bool has_msr_bndcfgs; static bool has_msr_kvm_steal_time; static int lm_capable_kernel; @@ -85,6 +86,11 @@ static bool has_msr_xss; static bool has_msr_architectural_pmu; static uint32_t num_architectural_pmu_counters; +bool kvm_has_smm(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); +} + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); @@ -819,6 +825,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_tsc_deadline = true; continue; } + if (kvm_msr_list->indices[i] == MSR_IA32_SMBASE) { + has_msr_smbase = true; + continue; + } if (kvm_msr_list->indices[i] == MSR_IA32_MISC_ENABLE) { has_msr_misc_enable = true; continue; @@ -840,6 +850,40 @@ static int kvm_get_supported_msrs(KVMState *s) return ret; } +static Notifier smram_machine_done; +static KVMMemoryListener smram_listener; +static AddressSpace smram_address_space; +static MemoryRegion smram_as_root; +static MemoryRegion smram_as_mem; + +static void register_smram_listener(Notifier *n, void *unused) +{ + MemoryRegion *smram = + (MemoryRegion *) object_resolve_path("/machine/smram", NULL); + + /* Outer container... */ + memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull); + memory_region_set_enabled(&smram_as_root, true); + + /* ... with two regions inside: normal system memory with low + * priority, and... + */ + memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram", + get_system_memory(), 0, ~0ull); + memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0); + memory_region_set_enabled(&smram_as_mem, true); + + if (smram) { + /* ... SMRAM with higher priority */ + memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10); + memory_region_set_enabled(smram, true); + } + + address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM"); + kvm_memory_listener_register(kvm_state, &smram_listener, + &smram_address_space, 1); +} + int kvm_arch_init(MachineState *ms, KVMState *s) { uint64_t identity_base = 0xfffbc000; @@ -898,6 +942,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s) return ret; } } + + if (kvm_check_extension(s, KVM_CAP_X86_SMM)) { + smram_machine_done.notify = register_smram_listener; + qemu_add_machine_init_done_notifier(&smram_machine_done); + } return 0; } @@ -1245,6 +1294,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE, env->msr_ia32_misc_enable); } + if (has_msr_smbase) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_SMBASE, env->smbase); + } if (has_msr_bndcfgs) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); } @@ -1606,6 +1658,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_misc_enable) { msrs[n++].index = MSR_IA32_MISC_ENABLE; } + if (has_msr_smbase) { + msrs[n++].index = MSR_IA32_SMBASE; + } if (has_msr_feature_control) { msrs[n++].index = MSR_IA32_FEATURE_CONTROL; } @@ -1760,6 +1815,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_MISC_ENABLE: env->msr_ia32_misc_enable = msrs[i].data; break; + case MSR_IA32_SMBASE: + env->smbase = msrs[i].data; + break; case MSR_IA32_FEATURE_CONTROL: env->msr_ia32_feature_control = msrs[i].data; break; @@ -1923,6 +1981,7 @@ static int kvm_put_apic(X86CPU *cpu) static int kvm_put_vcpu_events(X86CPU *cpu, int level) { + CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; struct kvm_vcpu_events events = {}; @@ -1947,6 +2006,24 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.sipi_vector = env->sipi_vector; + if (has_msr_smbase) { + events.smi.smm = !!(env->hflags & HF_SMM_MASK); + events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK); + if (kvm_irqchip_in_kernel()) { + /* As soon as these are moved to the kernel, remove them + * from cs->interrupt_request. + */ + events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI; + events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT; + cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); + } else { + /* Keep these in cs->interrupt_request. */ + events.smi.pending = 0; + events.smi.latched_init = 0; + } + events.flags |= KVM_VCPUEVENT_VALID_SMM; + } + events.flags = 0; if (level >= KVM_PUT_RESET_STATE) { events.flags |= @@ -1966,6 +2043,7 @@ static int kvm_get_vcpu_events(X86CPU *cpu) return 0; } + memset(&events, 0, sizeof(events)); ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); if (ret < 0) { return ret; @@ -1987,6 +2065,29 @@ static int kvm_get_vcpu_events(X86CPU *cpu) env->hflags2 &= ~HF2_NMI_MASK; } + if (events.flags & KVM_VCPUEVENT_VALID_SMM) { + if (events.smi.smm) { + env->hflags |= HF_SMM_MASK; + } else { + env->hflags &= ~HF_SMM_MASK; + } + if (events.smi.pending) { + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); + } else { + cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); + } + if (events.smi.smm_inside_nmi) { + env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK; + } else { + env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK; + } + if (events.smi.latched_init) { + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); + } else { + cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); + } + } + env->sipi_vector = events.sipi_vector; return 0; @@ -2190,16 +2291,28 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) int ret; /* Inject NMI */ - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - qemu_mutex_lock_iothread(); - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; - qemu_mutex_unlock_iothread(); - - DPRINTF("injected NMI\n"); - ret = kvm_vcpu_ioctl(cpu, KVM_NMI); - if (ret < 0) { - fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", - strerror(-ret)); + if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + qemu_mutex_lock_iothread(); + cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + qemu_mutex_unlock_iothread(); + DPRINTF("injected NMI\n"); + ret = kvm_vcpu_ioctl(cpu, KVM_NMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", + strerror(-ret)); + } + } + if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + qemu_mutex_lock_iothread(); + cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + qemu_mutex_unlock_iothread(); + DPRINTF("injected SMI\n"); + ret = kvm_vcpu_ioctl(cpu, KVM_SMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n", + strerror(-ret)); + } } } @@ -2212,7 +2325,13 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) * pending TPR access reports. */ if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - cpu->exit_request = 1; + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + cpu->exit_request = 1; + } + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->exit_request = 1; + } } if (!kvm_irqchip_in_kernel()) { @@ -2260,6 +2379,11 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; + if (run->flags & KVM_RUN_X86_SMM) { + env->hflags |= HF_SMM_MASK; + } else { + env->hflags &= HF_SMM_MASK; + } if (run->if_flag) { env->eflags |= IF_MASK; } else { @@ -2307,7 +2431,8 @@ int kvm_arch_process_async_events(CPUState *cs) } } - if (cs->interrupt_request & CPU_INTERRUPT_INIT) { + if ((cs->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { kvm_cpu_synchronize_state(cs); do_cpu_init(cpu); } diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h index cac30fd381..e557e94f44 100644 --- a/target-i386/kvm_i386.h +++ b/target-i386/kvm_i386.h @@ -14,6 +14,7 @@ #include "sysemu/kvm.h" bool kvm_allows_irq0_override(void); +bool kvm_has_smm(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); |