aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/acpi/aml-build.c68
-rw-r--r--hw/acpi/pcihp.c47
-rw-r--r--hw/acpi/piix4.c40
-rw-r--r--hw/arm/virt-acpi-build.c40
-rw-r--r--hw/arm/virt.c2
-rw-r--r--hw/core/machine.c2
-rw-r--r--hw/core/qdev-properties.c176
-rw-r--r--hw/display/virtio-gpu-pci.c7
-rw-r--r--hw/display/virtio-vga.c7
-rw-r--r--hw/i386/acpi-build.c52
-rw-r--r--hw/i386/amd_iommu.c2
-rw-r--r--hw/i386/intel_iommu.c72
-rw-r--r--hw/i386/intel_iommu_internal.h3
-rw-r--r--hw/i386/pc.c4
-rw-r--r--hw/i386/pc_piix.c72
-rw-r--r--hw/i386/pc_q35.c4
-rw-r--r--hw/i386/trace-events6
-rw-r--r--hw/i386/x86-iommu.c18
-rw-r--r--hw/intc/Makefile.objs2
-rw-r--r--hw/intc/spapr_xive.c1486
-rw-r--r--hw/intc/xics_spapr.c3
-rw-r--r--hw/intc/xive.c1599
-rw-r--r--hw/net/vmxnet3.c116
-rw-r--r--hw/net/vmxnet3_defs.h133
-rw-r--r--hw/pci-bridge/gen_pcie_root_port.c4
-rw-r--r--hw/pci-bridge/pci_bridge_dev.c31
-rw-r--r--hw/pci-bridge/pcie_pci_bridge.c32
-rw-r--r--hw/pci-bridge/pcie_root_port.c14
-rw-r--r--hw/pci/pci.c4
-rw-r--r--hw/pci/pci_bridge.c2
-rw-r--r--hw/pci/pci_host.c26
-rw-r--r--hw/pci/pcie.c159
-rw-r--r--hw/pci/pcie_port.c5
-rw-r--r--hw/pci/shpc.c25
-rw-r--r--hw/ppc/e500.c18
-rw-r--r--hw/ppc/mac_newworld.c30
-rw-r--r--hw/ppc/ppc405_boards.c4
-rw-r--r--hw/ppc/ppc405_uc.c4
-rw-r--r--hw/ppc/ppc440_bamboo.c5
-rw-r--r--hw/ppc/sam460ex.c2
-rw-r--r--hw/ppc/spapr.c121
-rw-r--r--hw/ppc/spapr_cpu_core.c4
-rw-r--r--hw/ppc/spapr_iommu.c2
-rw-r--r--hw/ppc/spapr_irq.c194
-rw-r--r--hw/ppc/spapr_pci.c33
-rw-r--r--hw/ppc/spapr_rtas_ddw.c19
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/ppc/virtex_ml507.c2
-rw-r--r--hw/rdma/rdma_backend.c524
-rw-r--r--hw/rdma/rdma_backend.h28
-rw-r--r--hw/rdma/rdma_backend_defs.h19
-rw-r--r--hw/rdma/rdma_rm.c120
-rw-r--r--hw/rdma/rdma_rm.h17
-rw-r--r--hw/rdma/rdma_rm_defs.h21
-rw-r--r--hw/rdma/rdma_utils.c1
-rw-r--r--hw/rdma/rdma_utils.h26
-rw-r--r--hw/rdma/vmw/pvrdma.h10
-rw-r--r--hw/rdma/vmw/pvrdma_cmd.c273
-rw-r--r--hw/rdma/vmw/pvrdma_dev_ring.c29
-rw-r--r--hw/rdma/vmw/pvrdma_dev_ring.h1
-rw-r--r--hw/rdma/vmw/pvrdma_main.c70
-rw-r--r--hw/rdma/vmw/pvrdma_qp_ops.c62
-rw-r--r--hw/s390x/css.c32
-rw-r--r--hw/s390x/s390-pci-bus.c12
-rw-r--r--hw/smbios/smbios-stub.c2
-rw-r--r--hw/smbios/smbios.c3
-rw-r--r--hw/smbios/smbios_build.h4
-rw-r--r--hw/smbios/smbios_type_38-stub.c2
-rw-r--r--hw/smbios/smbios_type_38.c3
-rw-r--r--hw/vfio/ap.c2
-rw-r--r--hw/vfio/pci.c9
-rw-r--r--hw/virtio/virtio-crypto-pci.c7
-rw-r--r--hw/virtio/virtio-pci.c267
-rw-r--r--hw/virtio/virtio-pci.h78
74 files changed, 5440 insertions, 885 deletions
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 1e43cd736d..555c24f21d 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1589,6 +1589,74 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
g_array_free(tables->vmgenid, mfre);
}
+/*
+ * ACPI spec 5.2.5.3 Root System Description Pointer (RSDP).
+ * (Revision 1.0 or later)
+ */
+void
+build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data)
+{
+ int tbl_off = tbl->len; /* Table offset in the RSDP file */
+
+ switch (rsdp_data->revision) {
+ case 0:
+ /* With ACPI 1.0, we must have an RSDT pointer */
+ g_assert(rsdp_data->rsdt_tbl_offset);
+ break;
+ case 2:
+ /* With ACPI 2.0+, we must have an XSDT pointer */
+ g_assert(rsdp_data->xsdt_tbl_offset);
+ break;
+ default:
+ /* Only revisions 0 (ACPI 1.0) and 2 (ACPI 2.0+) are valid for RSDP */
+ g_assert_not_reached();
+ }
+
+ bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, tbl, 16,
+ true /* fseg memory */);
+
+ g_array_append_vals(tbl, "RSD PTR ", 8); /* Signature */
+ build_append_int_noprefix(tbl, 0, 1); /* Checksum */
+ g_array_append_vals(tbl, rsdp_data->oem_id, 6); /* OEMID */
+ build_append_int_noprefix(tbl, rsdp_data->revision, 1); /* Revision */
+ build_append_int_noprefix(tbl, 0, 4); /* RsdtAddress */
+ if (rsdp_data->rsdt_tbl_offset) {
+ /* RSDT address to be filled by guest linker */
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE,
+ tbl_off + 16, 4,
+ ACPI_BUILD_TABLE_FILE,
+ *rsdp_data->rsdt_tbl_offset);
+ }
+
+ /* Checksum to be filled by guest linker */
+ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
+ tbl_off, 20, /* ACPI rev 1.0 RSDP size */
+ 8);
+
+ if (rsdp_data->revision == 0) {
+ /* ACPI 1.0 RSDP, we're done */
+ return;
+ }
+
+ build_append_int_noprefix(tbl, 36, 4); /* Length */
+
+ /* XSDT address to be filled by guest linker */
+ build_append_int_noprefix(tbl, 0, 8); /* XsdtAddress */
+ /* We already validated our xsdt pointer */
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE,
+ tbl_off + 24, 8,
+ ACPI_BUILD_TABLE_FILE,
+ *rsdp_data->xsdt_tbl_offset);
+
+ build_append_int_noprefix(tbl, 0, 1); /* Extended Checksum */
+ build_append_int_noprefix(tbl, 0, 3); /* Reserved */
+
+ /* Extended checksum to be filled by Guest linker */
+ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
+ tbl_off, 36, /* ACPI rev 2.0 RSDP size */
+ 32);
+}
+
/* Build rsdt table */
void
build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets,
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 80d42e12ff..7bc7a72340 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -30,6 +30,7 @@
#include "hw/hw.h"
#include "hw/i386/pc.h"
#include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
#include "hw/acpi/acpi.h"
#include "sysemu/sysemu.h"
#include "exec/address-spaces.h"
@@ -153,6 +154,7 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev)
static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots)
{
+ HotplugHandler *hotplug_ctrl;
BusChild *kid, *next;
int slot = ctz32(slots);
PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel);
@@ -170,7 +172,8 @@ static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slo
PCIDevice *dev = PCI_DEVICE(qdev);
if (PCI_SLOT(dev->devfn) == slot) {
if (!acpi_pcihp_pc_no_hotplug(s, dev)) {
- object_unparent(OBJECT(qdev));
+ hotplug_ctrl = qdev_get_hotplug_handler(qdev);
+ hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort);
}
}
}
@@ -217,25 +220,48 @@ void acpi_pcihp_reset(AcpiPciHpState *s)
acpi_pcihp_update(s);
}
-void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
- DeviceState *dev, Error **errp)
+void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
{
- PCIDevice *pdev = PCI_DEVICE(dev);
- int slot = PCI_SLOT(pdev->devfn);
- int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
- if (bsel < 0) {
+ /* Only hotplugged devices need the hotplug capability. */
+ if (dev->hotplugged &&
+ acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))) < 0) {
error_setg(errp, "Unsupported bus. Bus doesn't have property '"
ACPI_PCIHP_PROP_BSEL "' set");
return;
}
+}
+
+void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
+ DeviceState *dev, Error **errp)
+{
+ PCIDevice *pdev = PCI_DEVICE(dev);
+ int slot = PCI_SLOT(pdev->devfn);
+ int bsel;
/* Don't send event when device is enabled during qemu machine creation:
* it is present on boot, no hotplug event is necessary. We do send an
* event when the device is disabled later. */
if (!dev->hotplugged) {
+ /*
+ * Overwrite the default hotplug handler with the ACPI PCI one
+ * for cold plugged bridges only.
+ */
+ if (!s->legacy_piix &&
+ object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
+ PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+
+ qbus_set_hotplug_handler(BUS(sec), DEVICE(hotplug_dev),
+ &error_abort);
+ /* We don't have to overwrite any other hotplug handler yet */
+ assert(QLIST_EMPTY(&sec->child));
+ }
+
return;
}
+ bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
+ g_assert(bsel >= 0);
s->acpi_pcihp_pci_status[bsel].up |= (1U << slot);
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
}
@@ -243,6 +269,13 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
DeviceState *dev, Error **errp)
{
+ object_unparent(OBJECT(dev));
+}
+
+void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+ AcpiPciHpState *s, DeviceState *dev,
+ Error **errp)
+{
PCIDevice *pdev = PCI_DEVICE(dev);
int slot = PCI_SLOT(pdev->devfn);
int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index e330f24c71..88f9a9ec09 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -173,6 +173,7 @@ static int vmstate_acpi_post_load(void *opaque, int version_id)
PIIX4PMState *s = opaque;
pm_io_space_update(s);
+ smbus_io_space_update(s);
return 0;
}
@@ -370,6 +371,18 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque)
acpi_pm1_evt_power_down(&s->ar);
}
+static void piix4_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp);
+ } else if (!object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
+ !object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ error_setg(errp, "acpi: device pre plug request for not supported"
+ " device type: %s", object_get_typename(OBJECT(dev)));
+ }
+}
+
static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@@ -392,8 +405,7 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
}
} else {
- error_setg(errp, "acpi: device plug request for not supported device"
- " type: %s", object_get_typename(OBJECT(dev)));
+ g_assert_not_reached();
}
}
@@ -407,8 +419,8 @@ static void piix4_device_unplug_request_cb(HotplugHandler *hotplug_dev,
acpi_memory_unplug_request_cb(hotplug_dev, &s->acpi_memory_hotplug,
dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
- acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev,
- errp);
+ acpi_pcihp_device_unplug_request_cb(hotplug_dev, &s->acpi_pci_hotplug,
+ dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) &&
!s->cpu_hotplug_legacy) {
acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
@@ -426,6 +438,9 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev,
if (s->acpi_memory_hotplug.is_enabled &&
object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
acpi_memory_unplug_cb(&s->acpi_memory_hotplug, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev,
+ errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) &&
!s->cpu_hotplug_legacy) {
acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp);
@@ -435,15 +450,6 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev,
}
}
-static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
-{
- PIIX4PMState *s = opaque;
-
- /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
- * and it's not hot-unpluggable */
- qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
-}
-
static void piix4_pm_machine_ready(Notifier *n, void *opaque)
{
PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready);
@@ -457,12 +463,6 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque)
pci_conf[0x63] = 0x60;
pci_conf[0x67] = (memory_region_present(io_as, 0x3f8) ? 0x08 : 0) |
(memory_region_present(io_as, 0x2f8) ? 0x90 : 0);
-
- if (s->use_acpi_pci_hotplug) {
- pci_for_each_bus(pci_get_bus(d), piix4_update_bus_hotplug, s);
- } else {
- piix4_update_bus_hotplug(pci_get_bus(d), s);
- }
}
static void piix4_pm_add_propeties(PIIX4PMState *s)
@@ -536,6 +536,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
piix4_acpi_system_hot_add_init(pci_address_space_io(dev),
pci_get_bus(dev), s);
+ qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), DEVICE(s), &error_abort);
piix4_pm_add_propeties(s);
}
@@ -702,6 +703,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
*/
dc->user_creatable = false;
dc->hotpluggable = false;
+ hc->pre_plug = piix4_device_pre_plug_cb;
hc->plug = piix4_device_plug_cb;
hc->unplug_request = piix4_device_unplug_request_cb;
hc->unplug = piix4_device_unplug_cb;
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 5785fb697c..95fad6f0ce 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -366,36 +366,6 @@ static void acpi_dsdt_add_power_button(Aml *scope)
aml_append(scope, dev);
}
-/* RSDP */
-static GArray *
-build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset)
-{
- AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp);
- unsigned xsdt_pa_size = sizeof(rsdp->xsdt_physical_address);
- unsigned xsdt_pa_offset =
- (char *)&rsdp->xsdt_physical_address - rsdp_table->data;
-
- bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16,
- true /* fseg memory */);
-
- memcpy(&rsdp->signature, "RSD PTR ", sizeof(rsdp->signature));
- memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, sizeof(rsdp->oem_id));
- rsdp->length = cpu_to_le32(sizeof(*rsdp));
- rsdp->revision = 0x02;
-
- /* Address to be filled by Guest linker */
- bios_linker_loader_add_pointer(linker,
- ACPI_BUILD_RSDP_FILE, xsdt_pa_offset, xsdt_pa_size,
- ACPI_BUILD_TABLE_FILE, xsdt_tbl_offset);
-
- /* Checksum to be filled by Guest linker */
- bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
- (char *)rsdp - rsdp_table->data, sizeof *rsdp,
- (char *)&rsdp->checksum - rsdp_table->data);
-
- return rsdp_table;
-}
-
static void
build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
@@ -854,7 +824,15 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL);
/* RSDP is in FSEG memory, so allocate it separately */
- build_rsdp(tables->rsdp, tables->linker, xsdt);
+ {
+ AcpiRsdpData rsdp_data = {
+ .revision = 2,
+ .oem_id = ACPI_BUILD_APPNAME6,
+ .xsdt_tbl_offset = &xsdt,
+ .rsdt_tbl_offset = NULL,
+ };
+ build_rsdp(tables->rsdp, tables->linker, &rsdp_data);
+ }
/* Cleanup memory that's no longer used. */
g_array_free(table_offsets, true);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5b678237b7..c2641e56ea 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -55,7 +55,7 @@
#include "hw/intc/arm_gic.h"
#include "hw/intc/arm_gicv3_common.h"
#include "kvm_arm.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
#include "qapi/visitor.h"
#include "standard-headers/linux/input.h"
#include "hw/arm/smmuv3.h"
diff --git a/hw/core/machine.c b/hw/core/machine.c
index c51423b647..4439ea663f 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -653,8 +653,10 @@ static void machine_class_base_init(ObjectClass *oc, void *data)
static void machine_initfn(Object *obj)
{
MachineState *ms = MACHINE(obj);
+ MachineClass *mc = MACHINE_GET_CLASS(obj);
ms->kernel_irqchip_allowed = true;
+ ms->kernel_irqchip_split = mc->default_kernel_irqchip_split;
ms->kvm_shadow_mem = -1;
ms->dump_guest_core = true;
ms->mem_merge = true;
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index bd84c4ea4c..943dc2654b 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1297,3 +1297,179 @@ const PropertyInfo qdev_prop_off_auto_pcibar = {
.set = set_enum,
.set_default_value = set_default_value_enum,
};
+
+/* --- PCIELinkSpeed 2_5/5/8/16 -- */
+
+static void get_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ DeviceState *dev = DEVICE(obj);
+ Property *prop = opaque;
+ PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop);
+ int speed;
+
+ switch (*p) {
+ case QEMU_PCI_EXP_LNK_2_5GT:
+ speed = PCIE_LINK_SPEED_2_5;
+ break;
+ case QEMU_PCI_EXP_LNK_5GT:
+ speed = PCIE_LINK_SPEED_5;
+ break;
+ case QEMU_PCI_EXP_LNK_8GT:
+ speed = PCIE_LINK_SPEED_8;
+ break;
+ case QEMU_PCI_EXP_LNK_16GT:
+ speed = PCIE_LINK_SPEED_16;
+ break;
+ default:
+ /* Unreachable */
+ abort();
+ }
+
+ visit_type_enum(v, prop->name, &speed, prop->info->enum_table, errp);
+}
+
+static void set_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ DeviceState *dev = DEVICE(obj);
+ Property *prop = opaque;
+ PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop);
+ int speed;
+ Error *local_err = NULL;
+
+ if (dev->realized) {
+ qdev_prop_set_after_realize(dev, name, errp);
+ return;
+ }
+
+ visit_type_enum(v, prop->name, &speed, prop->info->enum_table, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ switch (speed) {
+ case PCIE_LINK_SPEED_2_5:
+ *p = QEMU_PCI_EXP_LNK_2_5GT;
+ break;
+ case PCIE_LINK_SPEED_5:
+ *p = QEMU_PCI_EXP_LNK_5GT;
+ break;
+ case PCIE_LINK_SPEED_8:
+ *p = QEMU_PCI_EXP_LNK_8GT;
+ break;
+ case PCIE_LINK_SPEED_16:
+ *p = QEMU_PCI_EXP_LNK_16GT;
+ break;
+ default:
+ /* Unreachable */
+ abort();
+ }
+}
+
+const PropertyInfo qdev_prop_pcie_link_speed = {
+ .name = "PCIELinkSpeed",
+ .description = "2_5/5/8/16",
+ .enum_table = &PCIELinkSpeed_lookup,
+ .get = get_prop_pcielinkspeed,
+ .set = set_prop_pcielinkspeed,
+ .set_default_value = set_default_value_enum,
+};
+
+/* --- PCIELinkWidth 1/2/4/8/12/16/32 -- */
+
+static void get_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ DeviceState *dev = DEVICE(obj);
+ Property *prop = opaque;
+ PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop);
+ int width;
+
+ switch (*p) {
+ case QEMU_PCI_EXP_LNK_X1:
+ width = PCIE_LINK_WIDTH_1;
+ break;
+ case QEMU_PCI_EXP_LNK_X2:
+ width = PCIE_LINK_WIDTH_2;
+ break;
+ case QEMU_PCI_EXP_LNK_X4:
+ width = PCIE_LINK_WIDTH_4;
+ break;
+ case QEMU_PCI_EXP_LNK_X8:
+ width = PCIE_LINK_WIDTH_8;
+ break;
+ case QEMU_PCI_EXP_LNK_X12:
+ width = PCIE_LINK_WIDTH_12;
+ break;
+ case QEMU_PCI_EXP_LNK_X16:
+ width = PCIE_LINK_WIDTH_16;
+ break;
+ case QEMU_PCI_EXP_LNK_X32:
+ width = PCIE_LINK_WIDTH_32;
+ break;
+ default:
+ /* Unreachable */
+ abort();
+ }
+
+ visit_type_enum(v, prop->name, &width, prop->info->enum_table, errp);
+}
+
+static void set_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ DeviceState *dev = DEVICE(obj);
+ Property *prop = opaque;
+ PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop);
+ int width;
+ Error *local_err = NULL;
+
+ if (dev->realized) {
+ qdev_prop_set_after_realize(dev, name, errp);
+ return;
+ }
+
+ visit_type_enum(v, prop->name, &width, prop->info->enum_table, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ switch (width) {
+ case PCIE_LINK_WIDTH_1:
+ *p = QEMU_PCI_EXP_LNK_X1;
+ break;
+ case PCIE_LINK_WIDTH_2:
+ *p = QEMU_PCI_EXP_LNK_X2;
+ break;
+ case PCIE_LINK_WIDTH_4:
+ *p = QEMU_PCI_EXP_LNK_X4;
+ break;
+ case PCIE_LINK_WIDTH_8:
+ *p = QEMU_PCI_EXP_LNK_X8;
+ break;
+ case PCIE_LINK_WIDTH_12:
+ *p = QEMU_PCI_EXP_LNK_X12;
+ break;
+ case PCIE_LINK_WIDTH_16:
+ *p = QEMU_PCI_EXP_LNK_X16;
+ break;
+ case PCIE_LINK_WIDTH_32:
+ *p = QEMU_PCI_EXP_LNK_X32;
+ break;
+ default:
+ /* Unreachable */
+ abort();
+ }
+}
+
+const PropertyInfo qdev_prop_pcie_link_width = {
+ .name = "PCIELinkWidth",
+ .description = "1/2/4/8/12/16/32",
+ .enum_table = &PCIELinkWidth_lookup,
+ .get = get_prop_pcielinkwidth,
+ .set = set_prop_pcielinkwidth,
+ .set_default_value = set_default_value_enum,
+};
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index cece4aa495..faf76a8bc4 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -69,9 +69,8 @@ static void virtio_gpu_initfn(Object *obj)
TYPE_VIRTIO_GPU);
}
-static const TypeInfo virtio_gpu_pci_info = {
- .name = TYPE_VIRTIO_GPU_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = {
+ .generic_name = TYPE_VIRTIO_GPU_PCI,
.instance_size = sizeof(VirtIOGPUPCI),
.instance_init = virtio_gpu_initfn,
.class_init = virtio_gpu_pci_class_init,
@@ -79,6 +78,6 @@ static const TypeInfo virtio_gpu_pci_info = {
static void virtio_gpu_pci_register_types(void)
{
- type_register_static(&virtio_gpu_pci_info);
+ virtio_pci_types_register(&virtio_gpu_pci_info);
}
type_init(virtio_gpu_pci_register_types)
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index ab2e369b28..8db4d916f2 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -207,9 +207,8 @@ static void virtio_vga_inst_initfn(Object *obj)
TYPE_VIRTIO_GPU);
}
-static TypeInfo virtio_vga_info = {
- .name = TYPE_VIRTIO_VGA,
- .parent = TYPE_VIRTIO_PCI,
+static VirtioPCIDeviceTypeInfo virtio_vga_info = {
+ .generic_name = TYPE_VIRTIO_VGA,
.instance_size = sizeof(struct VirtIOVGA),
.instance_init = virtio_vga_inst_initfn,
.class_init = virtio_vga_class_init,
@@ -217,7 +216,7 @@ static TypeInfo virtio_vga_info = {
static void virtio_vga_register_types(void)
{
- type_register_static(&virtio_vga_info);
+ virtio_pci_types_register(&virtio_vga_info);
}
type_init(virtio_vga_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 236a20eaa8..14f757fc36 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2426,7 +2426,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
assert(iommu);
- if (iommu->intr_supported) {
+ if (x86_iommu_ir_supported(iommu)) {
dmar_flags |= 0x1; /* Flags: 0x1: INT_REMAP */
}
@@ -2499,7 +2499,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
* When interrupt remapping is supported, we add a special IVHD device
* for type IO-APIC.
*/
- if (x86_iommu_get_default()->intr_supported) {
+ if (x86_iommu_ir_supported(x86_iommu_get_default())) {
ivhd_table_len += 8;
}
/* IVHD length */
@@ -2535,7 +2535,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
* Linux IOMMU driver checks for the special IVHD device (type IO-APIC).
* See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059'
*/
- if (x86_iommu_get_default()->intr_supported) {
+ if (x86_iommu_ir_supported(x86_iommu_get_default())) {
build_append_int_noprefix(table_data,
(0x1ull << 56) | /* type IOAPIC */
(IOAPIC_SB_DEVID << 40) | /* IOAPIC devid */
@@ -2547,32 +2547,6 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker)
"IVRS", table_data->len - iommu_start, 1, NULL, NULL);
}
-static GArray *
-build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
-{
- AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp);
- unsigned rsdt_pa_size = sizeof(rsdp->rsdt_physical_address);
- unsigned rsdt_pa_offset =
- (char *)&rsdp->rsdt_physical_address - rsdp_table->data;
-
- bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16,
- true /* fseg memory */);
-
- memcpy(&rsdp->signature, "RSD PTR ", 8);
- memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, 6);
- /* Address to be filled by Guest linker */
- bios_linker_loader_add_pointer(linker,
- ACPI_BUILD_RSDP_FILE, rsdt_pa_offset, rsdt_pa_size,
- ACPI_BUILD_TABLE_FILE, rsdt_tbl_offset);
-
- /* Checksum to be filled by Guest linker */
- bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE,
- (char *)rsdp - rsdp_table->data, sizeof *rsdp,
- (char *)&rsdp->checksum - rsdp_table->data);
-
- return rsdp_table;
-}
-
typedef
struct AcpiBuildState {
/* Copy of table in RAM (for patching). */
@@ -2729,7 +2703,25 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
slic_oem.id, slic_oem.table_id);
/* RSDP is in FSEG memory, so allocate it separately */
- build_rsdp(tables->rsdp, tables->linker, rsdt);
+ {
+ AcpiRsdpData rsdp_data = {
+ .revision = 0,
+ .oem_id = ACPI_BUILD_APPNAME6,
+ .xsdt_tbl_offset = NULL,
+ .rsdt_tbl_offset = &rsdt,
+ };
+ build_rsdp(tables->rsdp, tables->linker, &rsdp_data);
+ if (!pcmc->rsdp_in_ram) {
+ /* We used to allocate some extra space for RSDP revision 2 but
+ * only used the RSDP revision 0 space. The extra bytes were
+ * zeroed out and not used.
+ * Here we continue wasting those extra 16 bytes to make sure we
+ * don't break migration for machine types 2.2 and older due to
+ * RSDP blob size mismatch.
+ */
+ build_append_int_noprefix(tables->rsdp, 0, 16);
+ }
+ }
/* We'll expose it all to Guest so we want to reduce
* chance of size changes.
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 353a810e6b..8ad707aba0 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1233,7 +1233,7 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
}
/* validate that we are configure with intremap=on */
- if (!X86_IOMMU_DEVICE(iommu)->intr_supported) {
+ if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
trace_amdvi_err("Interrupt remapping is enabled in the guest but "
"not in the host. Use intremap=on to enable interrupt "
"remapping in amd-iommu.");
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d97bcbc2f7..8b72735650 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -524,7 +524,6 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
addr = s->root + index * sizeof(*re);
if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
- trace_vtd_re_invalid(re->rsvd, re->val);
re->val = 0;
return -VTD_FR_ROOT_TABLE_INV;
}
@@ -545,7 +544,6 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
/* we have checked that root entry is present */
addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
- trace_vtd_re_invalid(root->rsvd, root->val);
return -VTD_FR_CONTEXT_TABLE_INV;
}
ce->lo = le64_to_cpu(ce->lo);
@@ -630,16 +628,20 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
break;
case VTD_CONTEXT_TT_DEV_IOTLB:
if (!x86_iommu->dt_supported) {
+ error_report_once("%s: DT specified but not supported", __func__);
return false;
}
break;
case VTD_CONTEXT_TT_PASS_THROUGH:
if (!x86_iommu->pt_supported) {
+ error_report_once("%s: PT specified but not supported", __func__);
return false;
}
break;
default:
/* Unknwon type */
+ error_report_once("%s: unknown ce type: %"PRIu32, __func__,
+ vtd_ce_get_type(ce));
return false;
}
return true;
@@ -1003,7 +1005,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
}
if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) {
- trace_vtd_re_invalid(re.rsvd, re.val);
+ error_report_once("%s: invalid root entry: rsvd=0x%"PRIx64
+ ", val=0x%"PRIx64" (reserved nonzero)",
+ __func__, re.rsvd, re.val);
return -VTD_FR_ROOT_ENTRY_RSVD;
}
@@ -1020,19 +1024,23 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
(ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
- trace_vtd_ce_invalid(ce->hi, ce->lo);
+ error_report_once("%s: invalid context entry: hi=%"PRIx64
+ ", lo=%"PRIx64" (reserved nonzero)",
+ __func__, ce->hi, ce->lo);
return -VTD_FR_CONTEXT_ENTRY_RSVD;
}
/* Check if the programming of context-entry is valid */
if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
- trace_vtd_ce_invalid(ce->hi, ce->lo);
+ error_report_once("%s: invalid context entry: hi=%"PRIx64
+ ", lo=%"PRIx64" (level %d not supported)",
+ __func__, ce->hi, ce->lo, vtd_ce_get_level(ce));
return -VTD_FR_CONTEXT_ENTRY_INV;
}
/* Do translation type check */
if (!vtd_ce_type_check(x86_iommu, ce)) {
- trace_vtd_ce_invalid(ce->hi, ce->lo);
+ /* Errors dumped in vtd_ce_type_check() */
return -VTD_FR_CONTEXT_ENTRY_INV;
}
@@ -1878,7 +1886,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
{
if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
(inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
- trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
+ " (reserved nonzero)", __func__, inv_desc->hi,
+ inv_desc->lo);
return false;
}
if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
@@ -1901,7 +1911,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
/* Interrupt flag */
vtd_generate_completion_event(s);
} else {
- trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
+ " (unknown type)", __func__, inv_desc->hi,
+ inv_desc->lo);
return false;
}
return true;
@@ -1913,7 +1925,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
uint16_t sid, fmask;
if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
- trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64
+ " (reserved nonzero)", __func__, inv_desc->hi,
+ inv_desc->lo);
return false;
}
switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
@@ -1932,7 +1946,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
break;
default:
- trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64
+ " (invalid type)", __func__, inv_desc->hi,
+ inv_desc->lo);
return false;
}
return true;
@@ -1946,7 +1962,9 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
(inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
- trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64
+ ", lo=0x%"PRIx64" (reserved bits unzero)\n",
+ __func__, inv_desc->hi, inv_desc->lo);
return false;
}
@@ -1965,14 +1983,20 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
if (am > VTD_MAMV) {
- trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64
+ ", lo=0x%"PRIx64" (am=%u > VTD_MAMV=%u)\n",
+ __func__, inv_desc->hi, inv_desc->lo,
+ am, (unsigned)VTD_MAMV);
return false;
}
vtd_iotlb_page_invalidate(s, domain_id, addr, am);
break;
default:
- trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64
+ ", lo=0x%"PRIx64" (type mismatch: 0x%llx)\n",
+ __func__, inv_desc->hi, inv_desc->lo,
+ inv_desc->lo & VTD_INV_DESC_IOTLB_G);
return false;
}
return true;
@@ -2012,7 +2036,9 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
(inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
- trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
+ error_report_once("%s: invalid dev-iotlb inv desc: hi=%"PRIx64
+ ", lo=%"PRIx64" (reserved nonzero)", __func__,
+ inv_desc->hi, inv_desc->lo);
return false;
}
@@ -2103,7 +2129,9 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
break;
default:
- trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
+ error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
+ " (unknown type)", __func__, inv_desc.hi,
+ inv_desc.lo);
return false;
}
s->iq_head++;
@@ -2540,7 +2568,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
__func__, pci_bus_num(vtd_as->bus),
VTD_PCI_SLOT(vtd_as->devfn),
VTD_PCI_FUNC(vtd_as->devfn),
- iotlb.iova);
+ addr);
}
return iotlb;
@@ -2628,9 +2656,10 @@ static Property vtd_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
- DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits,
+ DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits,
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
+ DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -3119,6 +3148,9 @@ static void vtd_init(IntelIOMMUState *s)
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
+ if (s->dma_drain) {
+ s->cap |= VTD_CAP_DRAIN;
+ }
if (s->aw_bits == VTD_HOST_AW_48BIT) {
s->cap |= VTD_CAP_SAGAW_48bit;
}
@@ -3137,7 +3169,7 @@ static void vtd_init(IntelIOMMUState *s)
vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits);
- if (x86_iommu->intr_supported) {
+ if (x86_iommu_ir_supported(x86_iommu)) {
s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
if (s->intr_eim == ON_OFF_AUTO_ON) {
s->ecap |= VTD_ECAP_EIM;
@@ -3238,14 +3270,14 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
- if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
+ if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu_ir_supported(x86_iommu)) {
error_setg(errp, "eim=on cannot be selected without intremap=on");
return false;
}
if (s->intr_eim == ON_OFF_AUTO_AUTO) {
s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim)
- && x86_iommu->intr_supported ?
+ && x86_iommu_ir_supported(x86_iommu) ?
ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index d084099ed9..00e9edbc66 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -203,6 +203,9 @@
#define VTD_CAP_MAMV (VTD_MAMV << 48)
#define VTD_CAP_PSI (1ULL << 39)
#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
+#define VTD_CAP_DRAIN_WRITE (1ULL << 54)
+#define VTD_CAP_DRAIN_READ (1ULL << 55)
+#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
/* Supported Adjusted Guest Address Widths */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 115bc2825c..f248662e97 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -37,7 +37,7 @@
#include "hw/pci/pci_bus.h"
#include "hw/nvram/fw_cfg.h"
#include "hw/timer/hpet.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
#include "hw/loader.h"
#include "elf.h"
#include "multiboot.h"
@@ -1244,7 +1244,7 @@ void pc_machine_done(Notifier *notifier, void *data)
if (pcms->apic_id_limit > 255 && !xen_enabled()) {
IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
- if (!iommu || !iommu->x86_iommu.intr_supported ||
+ if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) ||
iommu->intr_eim != ON_OFF_AUTO_ON) {
error_report("current -smp configuration requires "
"Extended Interrupt Mode enabled. "
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6981cfa740..7f1cb527b5 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -30,7 +30,7 @@
#include "hw/i386/pc.h"
#include "hw/i386/apic.h"
#include "hw/display/ramfb.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_ids.h"
#include "hw/usb.h"
@@ -368,7 +368,7 @@ static void pc_compat_1_2(MachineState *machine)
x86_cpu_change_kvm_default("kvm-pv-eoi", NULL);
}
-/* PC compat function for pc-0.10 to pc-0.13 */
+/* PC compat function for pc-0.12 and pc-0.13 */
static void pc_compat_0_13(MachineState *machine)
{
pc_compat_1_2(machine);
@@ -834,6 +834,7 @@ static void pc_i440fx_0_15_machine_options(MachineClass *m)
{
pc_i440fx_1_0_machine_options(m);
m->hw_version = "0.15";
+ m->deprecation_reason = "use a newer machine type instead";
SET_MACHINE_COMPAT(m, PC_COMPAT_0_15);
}
@@ -951,73 +952,6 @@ static void pc_i440fx_0_12_machine_options(MachineClass *m)
DEFINE_I440FX_MACHINE(v0_12, "pc-0.12", pc_compat_0_13,
pc_i440fx_0_12_machine_options);
-
-#define PC_COMPAT_0_11 \
- PC_CPU_MODEL_IDS("0.11") \
- {\
- .driver = "virtio-blk-pci",\
- .property = "vectors",\
- .value = stringify(0),\
- },{\
- .driver = TYPE_PCI_DEVICE,\
- .property = "rombar",\
- .value = stringify(0),\
- },{\
- .driver = "ide-drive",\
- .property = "ver",\
- .value = "0.11",\
- },{\
- .driver = "scsi-disk",\
- .property = "ver",\
- .value = "0.11",\
- },
-
-static void pc_i440fx_0_11_machine_options(MachineClass *m)
-{
- pc_i440fx_0_12_machine_options(m);
- m->hw_version = "0.11";
- m->deprecation_reason = "use a newer machine type instead";
- SET_MACHINE_COMPAT(m, PC_COMPAT_0_11);
-}
-
-DEFINE_I440FX_MACHINE(v0_11, "pc-0.11", pc_compat_0_13,
- pc_i440fx_0_11_machine_options);
-
-
-#define PC_COMPAT_0_10 \
- PC_CPU_MODEL_IDS("0.10") \
- {\
- .driver = "virtio-blk-pci",\
- .property = "class",\
- .value = stringify(PCI_CLASS_STORAGE_OTHER),\
- },{\
- .driver = "virtio-serial-pci",\
- .property = "class",\
- .value = stringify(PCI_CLASS_DISPLAY_OTHER),\
- },{\
- .driver = "virtio-net-pci",\
- .property = "vectors",\
- .value = stringify(0),\
- },{\
- .driver = "ide-drive",\
- .property = "ver",\
- .value = "0.10",\
- },{\
- .driver = "scsi-disk",\
- .property = "ver",\
- .value = "0.10",\
- },
-
-static void pc_i440fx_0_10_machine_options(MachineClass *m)
-{
- pc_i440fx_0_11_machine_options(m);
- m->hw_version = "0.10";
- SET_MACHINE_COMPAT(m, PC_COMPAT_0_10);
-}
-
-DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13,
- pc_i440fx_0_10_machine_options);
-
typedef struct {
uint16_t gpu_device_id;
uint16_t pch_device_id;
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 58459bdab5..0a3f3f18e4 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -47,7 +47,7 @@
#include "hw/i386/amd_iommu.h"
#include "hw/i386/intel_iommu.h"
#include "hw/display/ramfb.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
#include "hw/ide/pci.h"
#include "hw/ide/ahci.h"
#include "hw/usb.h"
@@ -304,6 +304,7 @@ static void pc_q35_machine_options(MachineClass *m)
m->units_per_default_bus = 1;
m->default_machine_opts = "firmware=bios-256k.bin";
m->default_display = "std";
+ m->default_kernel_irqchip_split = true;
m->no_floppy = 1;
machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
@@ -323,6 +324,7 @@ DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL,
static void pc_q35_3_1_machine_options(MachineClass *m)
{
pc_q35_4_0_machine_options(m);
+ m->default_kernel_irqchip_split = false;
m->alias = NULL;
SET_MACHINE_COMPAT(m, PC_COMPAT_3_1);
}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 6ac347d18c..77244fc384 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -5,19 +5,15 @@ x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC inv
# hw/i386/intel_iommu.c
vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
-vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
vtd_inv_desc_cc_global(void) "context invalidate globally"
vtd_inv_desc_cc_device(uint8_t bus, uint8_t dev, uint8_t fn) "context invalidate device %02"PRIx8":%02"PRIx8".%02"PRIx8
vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate devices sid 0x%"PRIx16" fmask 0x%"PRIx16
-vtd_inv_desc_cc_invalid(uint64_t hi, uint64_t lo) "invalid context-cache desc hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_inv_desc_iotlb_global(void) "iotlb invalidate global"
vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16
vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8
-vtd_inv_desc_iotlb_invalid(uint64_t hi, uint64_t lo) "invalid iotlb desc hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32
vtd_inv_desc_wait_irq(const char *msg) "%s"
-vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_inv_desc_iec(uint32_t granularity, uint32_t index, uint32_t mask) "granularity 0x%"PRIx32" index 0x%"PRIx32" mask 0x%"PRIx32
vtd_inv_qi_enable(bool enable) "enabled %d"
@@ -27,9 +23,7 @@ vtd_inv_qi_tail(uint16_t head) "write tail %d"
vtd_inv_qi_fetch(void) ""
vtd_context_cache_reset(void) ""
vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
-vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
-vtd_ce_invalid(uint64_t hi, uint64_t lo) "invalid context entry hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index abc3c03158..d1534c1ae0 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -112,6 +112,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
PCMachineState *pcms =
PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
QLIST_INIT(&x86_iommu->iec_notifiers);
+ bool irq_all_kernel = kvm_irqchip_in_kernel() && !kvm_irqchip_is_split();
if (!pcms || !pcms->bus) {
error_setg(errp, "Machine-type '%s' not supported by IOMMU",
@@ -119,9 +120,14 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
return;
}
+ /* If the user didn't specify IR, choose a default value for it */
+ if (x86_iommu->intr_supported == ON_OFF_AUTO_AUTO) {
+ x86_iommu->intr_supported = irq_all_kernel ?
+ ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON;
+ }
+
/* Both Intel and AMD IOMMU IR only support "kernel-irqchip={off|split}" */
- if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
- !kvm_irqchip_is_split()) {
+ if (x86_iommu_ir_supported(x86_iommu) && irq_all_kernel) {
error_setg(errp, "Interrupt Remapping cannot work with "
"kernel-irqchip=on, please use 'split|off'.");
return;
@@ -135,7 +141,8 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
}
static Property x86_iommu_properties[] = {
- DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
+ DEFINE_PROP_ON_OFF_AUTO("intremap", X86IOMMUState,
+ intr_supported, ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
DEFINE_PROP_END_OF_LIST(),
@@ -148,6 +155,11 @@ static void x86_iommu_class_init(ObjectClass *klass, void *data)
dc->props = x86_iommu_properties;
}
+bool x86_iommu_ir_supported(X86IOMMUState *s)
+{
+ return s->intr_supported == ON_OFF_AUTO_ON;
+}
+
static const TypeInfo x86_iommu_info = {
.name = TYPE_X86_IOMMU_DEVICE,
.parent = TYPE_SYS_BUS_DEVICE,
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 0e9963f5ee..301a8e972d 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -37,6 +37,8 @@ obj-$(CONFIG_SH4) += sh_intc.o
obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
obj-$(CONFIG_XICS_KVM) += xics_kvm.o
+obj-$(CONFIG_XIVE) += xive.o
+obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
obj-$(CONFIG_POWERNV) += xics_pnv.o
obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
new file mode 100644
index 0000000000..0e39c90cbd
--- /dev/null
+++ b/hw/intc/spapr_xive.c
@@ -0,0 +1,1486 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive.h"
+#include "hw/ppc/xive_regs.h"
+
+/*
+ * XIVE Virtualization Controller BAR and Thread Managment BAR that we
+ * use for the ESB pages and the TIMA pages
+ */
+#define SPAPR_XIVE_VC_BASE 0x0006010000000000ull
+#define SPAPR_XIVE_TM_BASE 0x0006030203180000ull
+
+/*
+ * The allocation of VP blocks is a complex operation in OPAL and the
+ * VP identifiers have a relation with the number of HW chips, the
+ * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
+ * controller model does not have the same constraints and can use a
+ * simple mapping scheme of the CPU vcpu_id
+ *
+ * These identifiers are never returned to the OS.
+ */
+
+#define SPAPR_XIVE_NVT_BASE 0x400
+
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
+/*
+ * sPAPR NVT and END indexing helpers
+ */
+static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx)
+{
+ return nvt_idx - SPAPR_XIVE_NVT_BASE;
+}
+
+static void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu,
+ uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+ assert(cpu);
+
+ if (out_nvt_blk) {
+ *out_nvt_blk = SPAPR_XIVE_BLOCK_ID;
+ }
+
+ if (out_nvt_blk) {
+ *out_nvt_idx = SPAPR_XIVE_NVT_BASE + cpu->vcpu_id;
+ }
+}
+
+static int spapr_xive_target_to_nvt(uint32_t target,
+ uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+ PowerPCCPU *cpu = spapr_find_cpu(target);
+
+ if (!cpu) {
+ return -1;
+ }
+
+ spapr_xive_cpu_to_nvt(cpu, out_nvt_blk, out_nvt_idx);
+ return 0;
+}
+
+/*
+ * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
+ * priorities per CPU
+ */
+static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
+ uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+ assert(cpu);
+
+ if (out_end_blk) {
+ *out_end_blk = SPAPR_XIVE_BLOCK_ID;
+ }
+
+ if (out_end_idx) {
+ *out_end_idx = (cpu->vcpu_id << 3) + prio;
+ }
+}
+
+static int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
+ uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+ PowerPCCPU *cpu = spapr_find_cpu(target);
+
+ if (!cpu) {
+ return -1;
+ }
+
+ spapr_xive_cpu_to_end(cpu, prio, out_end_blk, out_end_idx);
+ return 0;
+}
+
+/*
+ * On sPAPR machines, use a simplified output for the XIVE END
+ * structure dumping only the information related to the OS EQ.
+ */
+static void spapr_xive_end_pic_print_info(sPAPRXive *xive, XiveEND *end,
+ Monitor *mon)
+{
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qentries = 1 << (qsize + 10);
+ uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+ uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+
+ monitor_printf(mon, "%3d/%d % 6d/%5d ^%d",
+ spapr_xive_nvt_to_target(0, nvt),
+ priority, qindex, qentries, qgen);
+
+ xive_end_queue_pic_print_info(end, 6, mon);
+ monitor_printf(mon, "]");
+}
+
+void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
+{
+ XiveSource *xsrc = &xive->source;
+ int i;
+
+ monitor_printf(mon, " LSIN PQ EISN CPU/PRIO EQ\n");
+
+ for (i = 0; i < xive->nr_irqs; i++) {
+ uint8_t pq = xive_source_esb_get(xsrc, i);
+ XiveEAS *eas = &xive->eat[i];
+
+ if (!xive_eas_is_valid(eas)) {
+ continue;
+ }
+
+ monitor_printf(mon, " %08x %s %c%c%c %s %08x ", i,
+ xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
+ pq & XIVE_ESB_VAL_P ? 'P' : '-',
+ pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ',
+ xive_eas_is_masked(eas) ? "M" : " ",
+ (int) xive_get_field64(EAS_END_DATA, eas->w));
+
+ if (!xive_eas_is_masked(eas)) {
+ uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+ XiveEND *end;
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ if (xive_end_is_valid(end)) {
+ spapr_xive_end_pic_print_info(xive, end, mon);
+ }
+ }
+ monitor_printf(mon, "\n");
+ }
+}
+
+static void spapr_xive_map_mmio(sPAPRXive *xive)
+{
+ sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(xive), 2, xive->tm_base);
+}
+
+/*
+ * When a Virtual Processor is scheduled to run on a HW thread, the
+ * hypervisor pushes its identifier in the OS CAM line. Emulate the
+ * same behavior under QEMU.
+ */
+void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx)
+{
+ uint8_t nvt_blk;
+ uint32_t nvt_idx;
+ uint32_t nvt_cam;
+
+ spapr_xive_cpu_to_nvt(POWERPC_CPU(tctx->cs), &nvt_blk, &nvt_idx);
+
+ nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_nvt_cam_line(nvt_blk, nvt_idx));
+ memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4);
+}
+
+static void spapr_xive_end_reset(XiveEND *end)
+{
+ memset(end, 0, sizeof(*end));
+
+ /* switch off the escalation and notification ESBs */
+ end->w1 = cpu_to_be32(END_W1_ESe_Q | END_W1_ESn_Q);
+}
+
+static void spapr_xive_reset(void *dev)
+{
+ sPAPRXive *xive = SPAPR_XIVE(dev);
+ int i;
+
+ /*
+ * The XiveSource has its own reset handler, which mask off all
+ * IRQs (!P|Q)
+ */
+
+ /* Mask all valid EASs in the IRQ number space. */
+ for (i = 0; i < xive->nr_irqs; i++) {
+ XiveEAS *eas = &xive->eat[i];
+ if (xive_eas_is_valid(eas)) {
+ eas->w = cpu_to_be64(EAS_VALID | EAS_MASKED);
+ } else {
+ eas->w = 0;
+ }
+ }
+
+ /* Clear all ENDs */
+ for (i = 0; i < xive->nr_ends; i++) {
+ spapr_xive_end_reset(&xive->endt[i]);
+ }
+}
+
+static void spapr_xive_instance_init(Object *obj)
+{
+ sPAPRXive *xive = SPAPR_XIVE(obj);
+
+ object_initialize(&xive->source, sizeof(xive->source), TYPE_XIVE_SOURCE);
+ object_property_add_child(obj, "source", OBJECT(&xive->source), NULL);
+
+ object_initialize(&xive->end_source, sizeof(xive->end_source),
+ TYPE_XIVE_END_SOURCE);
+ object_property_add_child(obj, "end_source", OBJECT(&xive->end_source),
+ NULL);
+}
+
+static void spapr_xive_realize(DeviceState *dev, Error **errp)
+{
+ sPAPRXive *xive = SPAPR_XIVE(dev);
+ XiveSource *xsrc = &xive->source;
+ XiveENDSource *end_xsrc = &xive->end_source;
+ Error *local_err = NULL;
+
+ if (!xive->nr_irqs) {
+ error_setg(errp, "Number of interrupt needs to be greater 0");
+ return;
+ }
+
+ if (!xive->nr_ends) {
+ error_setg(errp, "Number of interrupt needs to be greater 0");
+ return;
+ }
+
+ /*
+ * Initialize the internal sources, for IPIs and virtual devices.
+ */
+ object_property_set_int(OBJECT(xsrc), xive->nr_irqs, "nr-irqs",
+ &error_fatal);
+ object_property_add_const_link(OBJECT(xsrc), "xive", OBJECT(xive),
+ &error_fatal);
+ object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /*
+ * Initialize the END ESB source
+ */
+ object_property_set_int(OBJECT(end_xsrc), xive->nr_irqs, "nr-ends",
+ &error_fatal);
+ object_property_add_const_link(OBJECT(end_xsrc), "xive", OBJECT(xive),
+ &error_fatal);
+ object_property_set_bool(OBJECT(end_xsrc), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Set the mapping address of the END ESB pages after the source ESBs */
+ xive->end_base = xive->vc_base + (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+
+ /*
+ * Allocate the routing tables
+ */
+ xive->eat = g_new0(XiveEAS, xive->nr_irqs);
+ xive->endt = g_new0(XiveEND, xive->nr_ends);
+
+ /* TIMA initialization */
+ memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
+ "xive.tima", 4ull << TM_SHIFT);
+
+ /* Define all XIVE MMIO regions on SysBus */
+ sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
+ sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
+ sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
+
+ /* Map all regions */
+ spapr_xive_map_mmio(xive);
+
+ qemu_register_reset(spapr_xive_reset, dev);
+}
+
+static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
+ uint32_t eas_idx, XiveEAS *eas)
+{
+ sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+ if (eas_idx >= xive->nr_irqs) {
+ return -1;
+ }
+
+ *eas = xive->eat[eas_idx];
+ return 0;
+}
+
+static int spapr_xive_get_end(XiveRouter *xrtr,
+ uint8_t end_blk, uint32_t end_idx, XiveEND *end)
+{
+ sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+ if (end_idx >= xive->nr_ends) {
+ return -1;
+ }
+
+ memcpy(end, &xive->endt[end_idx], sizeof(XiveEND));
+ return 0;
+}
+
+static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
+ uint32_t end_idx, XiveEND *end,
+ uint8_t word_number)
+{
+ sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+ if (end_idx >= xive->nr_ends) {
+ return -1;
+ }
+
+ memcpy(&xive->endt[end_idx], end, sizeof(XiveEND));
+ return 0;
+}
+
+static int spapr_xive_get_nvt(XiveRouter *xrtr,
+ uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
+{
+ uint32_t vcpu_id = spapr_xive_nvt_to_target(nvt_blk, nvt_idx);
+ PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
+
+ if (!cpu) {
+ /* TODO: should we assert() if we can find a NVT ? */
+ return -1;
+ }
+
+ /*
+ * sPAPR does not maintain a NVT table. Return that the NVT is
+ * valid if we have found a matching CPU
+ */
+ nvt->w0 = cpu_to_be32(NVT_W0_VALID);
+ return 0;
+}
+
+static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk,
+ uint32_t nvt_idx, XiveNVT *nvt,
+ uint8_t word_number)
+{
+ /*
+ * We don't need to write back to the NVTs because the sPAPR
+ * machine should never hit a non-scheduled NVT. It should never
+ * get called.
+ */
+ g_assert_not_reached();
+}
+
+static const VMStateDescription vmstate_spapr_xive_end = {
+ .name = TYPE_SPAPR_XIVE "/end",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(w0, XiveEND),
+ VMSTATE_UINT32(w1, XiveEND),
+ VMSTATE_UINT32(w2, XiveEND),
+ VMSTATE_UINT32(w3, XiveEND),
+ VMSTATE_UINT32(w4, XiveEND),
+ VMSTATE_UINT32(w5, XiveEND),
+ VMSTATE_UINT32(w6, XiveEND),
+ VMSTATE_UINT32(w7, XiveEND),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr_xive_eas = {
+ .name = TYPE_SPAPR_XIVE "/eas",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT64(w, XiveEAS),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr_xive = {
+ .name = TYPE_SPAPR_XIVE,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
+ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
+ vmstate_spapr_xive_eas, XiveEAS),
+ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(endt, sPAPRXive, nr_ends,
+ vmstate_spapr_xive_end, XiveEND),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property spapr_xive_properties[] = {
+ DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
+ DEFINE_PROP_UINT32("nr-ends", sPAPRXive, nr_ends, 0),
+ DEFINE_PROP_UINT64("vc-base", sPAPRXive, vc_base, SPAPR_XIVE_VC_BASE),
+ DEFINE_PROP_UINT64("tm-base", sPAPRXive, tm_base, SPAPR_XIVE_TM_BASE),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_xive_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
+
+ dc->desc = "sPAPR XIVE Interrupt Controller";
+ dc->props = spapr_xive_properties;
+ dc->realize = spapr_xive_realize;
+ dc->vmsd = &vmstate_spapr_xive;
+
+ xrc->get_eas = spapr_xive_get_eas;
+ xrc->get_end = spapr_xive_get_end;
+ xrc->write_end = spapr_xive_write_end;
+ xrc->get_nvt = spapr_xive_get_nvt;
+ xrc->write_nvt = spapr_xive_write_nvt;
+}
+
+static const TypeInfo spapr_xive_info = {
+ .name = TYPE_SPAPR_XIVE,
+ .parent = TYPE_XIVE_ROUTER,
+ .instance_init = spapr_xive_instance_init,
+ .instance_size = sizeof(sPAPRXive),
+ .class_init = spapr_xive_class_init,
+};
+
+static void spapr_xive_register_types(void)
+{
+ type_register_static(&spapr_xive_info);
+}
+
+type_init(spapr_xive_register_types)
+
+bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi)
+{
+ XiveSource *xsrc = &xive->source;
+
+ if (lisn >= xive->nr_irqs) {
+ return false;
+ }
+
+ xive->eat[lisn].w |= cpu_to_be64(EAS_VALID);
+ xive_source_irq_set(xsrc, lisn, lsi);
+ return true;
+}
+
+bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn)
+{
+ XiveSource *xsrc = &xive->source;
+
+ if (lisn >= xive->nr_irqs) {
+ return false;
+ }
+
+ xive->eat[lisn].w &= cpu_to_be64(~EAS_VALID);
+ xive_source_irq_set(xsrc, lisn, false);
+ return true;
+}
+
+qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn)
+{
+ XiveSource *xsrc = &xive->source;
+
+ if (lisn >= xive->nr_irqs) {
+ return NULL;
+ }
+
+ /* The sPAPR machine/device should have claimed the IRQ before */
+ assert(xive_eas_is_valid(&xive->eat[lisn]));
+
+ return xive_source_qirq(xsrc, lisn);
+}
+
+/*
+ * XIVE hcalls
+ *
+ * The terminology used by the XIVE hcalls is the following :
+ *
+ * TARGET vCPU number
+ * EQ Event Queue assigned by OS to receive event data
+ * ESB page for source interrupt management
+ * LISN Logical Interrupt Source Number identifying a source in the
+ * machine
+ * EISN Effective Interrupt Source Number used by guest OS to
+ * identify source in the guest
+ *
+ * The EAS, END, NVT structures are not exposed.
+ */
+
+/*
+ * Linux hosts under OPAL reserve priority 7 for their own escalation
+ * interrupts (DD2.X POWER9). So we only allow the guest to use
+ * priorities [0..6].
+ */
+static bool spapr_xive_priority_is_reserved(uint8_t priority)
+{
+ switch (priority) {
+ case 0 ... 6:
+ return false;
+ case 7: /* OPAL escalation queue */
+ default:
+ return true;
+ }
+}
+
+/*
+ * The H_INT_GET_SOURCE_INFO hcall() is used to obtain the logical
+ * real address of the MMIO page through which the Event State Buffer
+ * entry associated with the value of the "lisn" parameter is managed.
+ *
+ * Parameters:
+ * Input
+ * - R4: "flags"
+ * Bits 0-63 reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as returned
+ * by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output
+ * - R4: "flags"
+ * Bits 0-59: Reserved
+ * Bit 60: H_INT_ESB must be used for Event State Buffer
+ * management
+ * Bit 61: 1 == LSI 0 == MSI
+ * Bit 62: the full function page supports trigger
+ * Bit 63: Store EOI Supported
+ * - R5: Logical Real address of full function Event State Buffer
+ * management page, -1 if H_INT_ESB hcall flag is set to 1.
+ * - R6: Logical Real Address of trigger only Event State Buffer
+ * management page or -1.
+ * - R7: Power of 2 page size for the ESB management pages returned in
+ * R5 and R6.
+ */
+
+#define SPAPR_XIVE_SRC_H_INT_ESB PPC_BIT(60) /* ESB manage with H_INT_ESB */
+#define SPAPR_XIVE_SRC_LSI PPC_BIT(61) /* Virtual LSI type */
+#define SPAPR_XIVE_SRC_TRIGGER PPC_BIT(62) /* Trigger and management
+ on same page */
+#define SPAPR_XIVE_SRC_STORE_EOI PPC_BIT(63) /* Store EOI support */
+
+static target_ulong h_int_get_source_info(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveSource *xsrc = &xive->source;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ if (!xive_eas_is_valid(&xive->eat[lisn])) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /*
+ * All sources are emulated under the main XIVE object and share
+ * the same characteristics.
+ */
+ args[0] = 0;
+ if (!xive_source_esb_has_2page(xsrc)) {
+ args[0] |= SPAPR_XIVE_SRC_TRIGGER;
+ }
+ if (xsrc->esb_flags & XIVE_SRC_STORE_EOI) {
+ args[0] |= SPAPR_XIVE_SRC_STORE_EOI;
+ }
+
+ /*
+ * Force the use of the H_INT_ESB hcall in case of an LSI
+ * interrupt. This is necessary under KVM to re-trigger the
+ * interrupt if the level is still asserted
+ */
+ if (xive_source_irq_is_lsi(xsrc, lisn)) {
+ args[0] |= SPAPR_XIVE_SRC_H_INT_ESB | SPAPR_XIVE_SRC_LSI;
+ }
+
+ if (!(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
+ args[1] = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn);
+ } else {
+ args[1] = -1;
+ }
+
+ if (xive_source_esb_has_2page(xsrc) &&
+ !(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
+ args[2] = xive->vc_base + xive_source_esb_page(xsrc, lisn);
+ } else {
+ args[2] = -1;
+ }
+
+ if (xive_source_esb_has_2page(xsrc)) {
+ args[3] = xsrc->esb_shift - 1;
+ } else {
+ args[3] = xsrc->esb_shift;
+ }
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_SOURCE_CONFIG hcall() is used to assign a Logical
+ * Interrupt Source to a target. The Logical Interrupt Source is
+ * designated with the "lisn" parameter and the target is designated
+ * with the "target" and "priority" parameters. Upon return from the
+ * hcall(), no additional interrupts will be directed to the old EQ.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-61: Reserved
+ * Bit 62: set the "eisn" in the EAS
+ * Bit 63: masks the interrupt source in the hardware interrupt
+ * control structure. An interrupt masked by this mechanism will
+ * be dropped, but it's source state bits will still be
+ * set. There is no race-free way of unmasking and restoring the
+ * source. Thus this should only be used in interrupts that are
+ * also masked at the source, and only in cases where the
+ * interrupt is not meant to be used for a large amount of time
+ * because no valid target exists for it for example
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as returned by
+ * the H_ALLOCATE_VAS_WINDOW hcall
+ * - R6: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R7: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ * - R8: "eisn" is the guest EISN associated with the "lisn"
+ *
+ * Output:
+ * - None
+ */
+
+#define SPAPR_XIVE_SRC_SET_EISN PPC_BIT(62)
+#define SPAPR_XIVE_SRC_MASK PPC_BIT(63)
+
+static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveEAS eas, new_eas;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+ target_ulong target = args[2];
+ target_ulong priority = args[3];
+ target_ulong eisn = args[4];
+ uint8_t end_blk;
+ uint32_t end_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~(SPAPR_XIVE_SRC_SET_EISN | SPAPR_XIVE_SRC_MASK)) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /* priority 0xff is used to reset the EAS */
+ if (priority == 0xff) {
+ new_eas.w = cpu_to_be64(EAS_VALID | EAS_MASKED);
+ goto out;
+ }
+
+ if (flags & SPAPR_XIVE_SRC_MASK) {
+ new_eas.w = eas.w | cpu_to_be64(EAS_MASKED);
+ } else {
+ new_eas.w = eas.w & cpu_to_be64(~EAS_MASKED);
+ }
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P4;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P3;
+ }
+
+ new_eas.w = xive_set_field64(EAS_END_BLOCK, new_eas.w, end_blk);
+ new_eas.w = xive_set_field64(EAS_END_INDEX, new_eas.w, end_idx);
+
+ if (flags & SPAPR_XIVE_SRC_SET_EISN) {
+ new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
+ }
+
+out:
+ xive->eat[lisn] = new_eas;
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_SOURCE_CONFIG hcall() is used to determine to which
+ * target/priority pair is assigned to the specified Logical Interrupt
+ * Source.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63 Reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as
+ * returned by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output:
+ * - R4: Target to which the specified Logical Interrupt Source is
+ * assigned
+ * - R5: Priority to which the specified Logical Interrupt Source is
+ * assigned
+ * - R6: EISN for the specified Logical Interrupt Source (this will be
+ * equivalent to the LISN if not changed by H_INT_SET_SOURCE_CONFIG)
+ */
+static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+ XiveEAS eas;
+ XiveEND *end;
+ uint8_t nvt_blk;
+ uint32_t end_idx, nvt_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /* EAS_END_BLOCK is unused on sPAPR */
+ end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
+ nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+ args[0] = spapr_xive_nvt_to_target(nvt_blk, nvt_idx);
+
+ if (xive_eas_is_masked(&eas)) {
+ args[1] = 0xff;
+ } else {
+ args[1] = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+ }
+
+ args[2] = xive_get_field64(EAS_END_DATA, eas.w);
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_QUEUE_INFO hcall() is used to get the logical real
+ * address of the notification management page associated with the
+ * specified target and priority.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63 Reserved
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ *
+ * Output:
+ * - R4: Logical real address of notification page
+ * - R5: Power of 2 page size of the notification page
+ */
+static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveENDSource *end_xsrc = &xive->end_source;
+ target_ulong flags = args[0];
+ target_ulong target = args[1];
+ target_ulong priority = args[2];
+ XiveEND *end;
+ uint8_t end_blk;
+ uint32_t end_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P3;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P2;
+ }
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
+ if (xive_end_is_enqueue(end)) {
+ args[1] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+ } else {
+ args[1] = 0;
+ }
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_QUEUE_CONFIG hcall() is used to set or reset a EQ for
+ * a given "target" and "priority". It is also used to set the
+ * notification config associated with the EQ. An EQ size of 0 is
+ * used to reset the EQ config for a given target and priority. If
+ * resetting the EQ config, the END associated with the given "target"
+ * and "priority" will be changed to disable queueing.
+ *
+ * Upon return from the hcall(), no additional interrupts will be
+ * directed to the old EQ (if one was set). The old EQ (if one was
+ * set) should be investigated for interrupts that occurred prior to
+ * or during the hcall().
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-62: Reserved
+ * Bit 63: Unconditional Notify (n) per the XIVE spec
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ * - R7: "eventQueue": The logical real address of the start of the EQ
+ * - R8: "eventQueueSize": The power of 2 EQ size per "ibm,xive-eq-sizes"
+ *
+ * Output:
+ * - None
+ */
+
+#define SPAPR_XIVE_END_ALWAYS_NOTIFY PPC_BIT(63)
+
+static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+ target_ulong target = args[1];
+ target_ulong priority = args[2];
+ target_ulong qpage = args[3];
+ target_ulong qsize = args[4];
+ XiveEND end;
+ uint8_t end_blk, nvt_blk;
+ uint32_t end_idx, nvt_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~SPAPR_XIVE_END_ALWAYS_NOTIFY) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P3;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P2;
+ }
+
+ assert(end_idx < xive->nr_ends);
+ memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
+
+ switch (qsize) {
+ case 12:
+ case 16:
+ case 21:
+ case 24:
+ end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff);
+ end.w3 = cpu_to_be32(qpage & 0xffffffff);
+ end.w0 |= cpu_to_be32(END_W0_ENQUEUE);
+ end.w0 = xive_set_field32(END_W0_QSIZE, end.w0, qsize - 12);
+ break;
+ case 0:
+ /* reset queue and disable queueing */
+ spapr_xive_end_reset(&end);
+ goto out;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EQ size %"PRIx64"\n",
+ qsize);
+ return H_P5;
+ }
+
+ if (qsize) {
+ hwaddr plen = 1 << qsize;
+ void *eq;
+
+ /*
+ * Validate the guest EQ. We should also check that the queue
+ * has been zeroed by the OS.
+ */
+ eq = address_space_map(CPU(cpu)->as, qpage, &plen, true,
+ MEMTXATTRS_UNSPECIFIED);
+ if (plen != 1 << qsize) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to map EQ @0x%"
+ HWADDR_PRIx "\n", qpage);
+ return H_P4;
+ }
+ address_space_unmap(CPU(cpu)->as, eq, plen, true, plen);
+ }
+
+ /* "target" should have been validated above */
+ if (spapr_xive_target_to_nvt(target, &nvt_blk, &nvt_idx)) {
+ g_assert_not_reached();
+ }
+
+ /*
+ * Ensure the priority and target are correctly set (they will not
+ * be right after allocation)
+ */
+ end.w6 = xive_set_field32(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
+ xive_set_field32(END_W6_NVT_INDEX, 0ul, nvt_idx);
+ end.w7 = xive_set_field32(END_W7_F0_PRIORITY, 0ul, priority);
+
+ if (flags & SPAPR_XIVE_END_ALWAYS_NOTIFY) {
+ end.w0 |= cpu_to_be32(END_W0_UCOND_NOTIFY);
+ } else {
+ end.w0 &= cpu_to_be32((uint32_t)~END_W0_UCOND_NOTIFY);
+ }
+
+ /*
+ * The generation bit for the END starts at 1 and The END page
+ * offset counter starts at 0.
+ */
+ end.w1 = cpu_to_be32(END_W1_GENERATION) |
+ xive_set_field32(END_W1_PAGE_OFF, 0ul, 0ul);
+ end.w0 |= cpu_to_be32(END_W0_VALID);
+
+ /*
+ * TODO: issue syncs required to ensure all in-flight interrupts
+ * are complete on the old END
+ */
+
+out:
+ /* Update END */
+ memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_QUEUE_CONFIG hcall() is used to get a EQ for a given
+ * target and priority.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-62: Reserved
+ * Bit 63: Debug: Return debug data
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ *
+ * Output:
+ * - R4: "flags":
+ * Bits 0-61: Reserved
+ * Bit 62: The value of Event Queue Generation Number (g) per
+ * the XIVE spec if "Debug" = 1
+ * Bit 63: The value of Unconditional Notify (n) per the XIVE spec
+ * - R5: The logical real address of the start of the EQ
+ * - R6: The power of 2 EQ size per "ibm,xive-eq-sizes"
+ * - R7: The value of Event Queue Offset Counter per XIVE spec
+ * if "Debug" = 1, else 0
+ *
+ */
+
+#define SPAPR_XIVE_END_DEBUG PPC_BIT(63)
+
+static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+ target_ulong target = args[1];
+ target_ulong priority = args[2];
+ XiveEND *end;
+ uint8_t end_blk;
+ uint32_t end_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~SPAPR_XIVE_END_DEBUG) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P3;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P2;
+ }
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ args[0] = 0;
+ if (xive_end_is_notify(end)) {
+ args[0] |= SPAPR_XIVE_END_ALWAYS_NOTIFY;
+ }
+
+ if (xive_end_is_enqueue(end)) {
+ args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+ } else {
+ args[1] = 0;
+ args[2] = 0;
+ }
+
+ /* TODO: do we need any locking on the END ? */
+ if (flags & SPAPR_XIVE_END_DEBUG) {
+ /* Load the event queue generation number into the return flags */
+ args[0] |= (uint64_t)xive_get_field32(END_W1_GENERATION, end->w1) << 62;
+
+ /* Load R7 with the event queue offset counter */
+ args[3] = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ } else {
+ args[3] = 0;
+ }
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_OS_REPORTING_LINE hcall() is used to set the
+ * reporting cache line pair for the calling thread. The reporting
+ * cache lines will contain the OS interrupt context when the OS
+ * issues a CI store byte to @TIMA+0xC10 to acknowledge the OS
+ * interrupt. The reporting cache lines can be reset by inputting -1
+ * in "reportingLine". Issuing the CI store byte without reporting
+ * cache lines registered will result in the data not being accessible
+ * to the OS.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ * - R5: "reportingLine": The logical real address of the reporting cache
+ * line pair
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_set_os_reporting_line(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ /* TODO: H_INT_SET_OS_REPORTING_LINE */
+ return H_FUNCTION;
+}
+
+/*
+ * The H_INT_GET_OS_REPORTING_LINE hcall() is used to get the logical
+ * real address of the reporting cache line pair set for the input
+ * "target". If no reporting cache line pair has been set, -1 is
+ * returned.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "reportingLine": The logical real address of the reporting
+ * cache line pair
+ *
+ * Output:
+ * - R4: The logical real address of the reporting line if set, else -1
+ */
+static target_ulong h_int_get_os_reporting_line(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ /* TODO: H_INT_GET_OS_REPORTING_LINE */
+ return H_FUNCTION;
+}
+
+/*
+ * The H_INT_ESB hcall() is used to issue a load or store to the ESB
+ * page for the input "lisn". This hcall is only supported for LISNs
+ * that have the ESB hcall flag set to 1 when returned from hcall()
+ * H_INT_GET_SOURCE_INFO.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-62: Reserved
+ * bit 63: Store: Store=1, store operation, else load operation
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as
+ * returned by the H_ALLOCATE_VAS_WINDOW hcall
+ * - R6: "esbOffset" is the offset into the ESB page for the load or
+ * store operation
+ * - R7: "storeData" is the data to write for a store operation
+ *
+ * Output:
+ * - R4: The value of the load if load operation, else -1
+ */
+
+#define SPAPR_XIVE_ESB_STORE PPC_BIT(63)
+
+static target_ulong h_int_esb(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveEAS eas;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+ target_ulong offset = args[2];
+ target_ulong data = args[3];
+ hwaddr mmio_addr;
+ XiveSource *xsrc = &xive->source;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~SPAPR_XIVE_ESB_STORE) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ if (offset > (1ull << xsrc->esb_shift)) {
+ return H_P3;
+ }
+
+ mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
+
+ if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
+ (flags & SPAPR_XIVE_ESB_STORE))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
+ HWADDR_PRIx "\n", mmio_addr);
+ return H_HARDWARE;
+ }
+ args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SYNC hcall() is used to issue hardware syncs that will
+ * ensure any in flight events for the input lisn are in the event
+ * queue.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as
+ * returned by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_sync(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveEAS eas;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ /* This is not real hardware. Nothing to be done */
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_RESET hcall() is used to reset all of the partition's
+ * interrupt exploitation structures to their initial state. This
+ * means losing all previously set interrupt state set via
+ * H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_reset(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ device_reset(DEVICE(xive));
+ return H_SUCCESS;
+}
+
+void spapr_xive_hcall_init(sPAPRMachineState *spapr)
+{
+ spapr_register_hypercall(H_INT_GET_SOURCE_INFO, h_int_get_source_info);
+ spapr_register_hypercall(H_INT_SET_SOURCE_CONFIG, h_int_set_source_config);
+ spapr_register_hypercall(H_INT_GET_SOURCE_CONFIG, h_int_get_source_config);
+ spapr_register_hypercall(H_INT_GET_QUEUE_INFO, h_int_get_queue_info);
+ spapr_register_hypercall(H_INT_SET_QUEUE_CONFIG, h_int_set_queue_config);
+ spapr_register_hypercall(H_INT_GET_QUEUE_CONFIG, h_int_get_queue_config);
+ spapr_register_hypercall(H_INT_SET_OS_REPORTING_LINE,
+ h_int_set_os_reporting_line);
+ spapr_register_hypercall(H_INT_GET_OS_REPORTING_LINE,
+ h_int_get_os_reporting_line);
+ spapr_register_hypercall(H_INT_ESB, h_int_esb);
+ spapr_register_hypercall(H_INT_SYNC, h_int_sync);
+ spapr_register_hypercall(H_INT_RESET, h_int_reset);
+}
+
+void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+ uint32_t phandle)
+{
+ sPAPRXive *xive = spapr->xive;
+ int node;
+ uint64_t timas[2 * 2];
+ /* Interrupt number ranges for the IPIs */
+ uint32_t lisn_ranges[] = {
+ cpu_to_be32(0),
+ cpu_to_be32(nr_servers),
+ };
+ /*
+ * EQ size - the sizes of pages supported by the system 4K, 64K,
+ * 2M, 16M. We only advertise 64K for the moment.
+ */
+ uint32_t eq_sizes[] = {
+ cpu_to_be32(16), /* 64K */
+ };
+ /*
+ * The following array is in sync with the reserved priorities
+ * defined by the 'spapr_xive_priority_is_reserved' routine.
+ */
+ uint32_t plat_res_int_priorities[] = {
+ cpu_to_be32(7), /* start */
+ cpu_to_be32(0xf8), /* count */
+ };
+ gchar *nodename;
+
+ /* Thread Interrupt Management Area : User (ring 3) and OS (ring 2) */
+ timas[0] = cpu_to_be64(xive->tm_base +
+ XIVE_TM_USER_PAGE * (1ull << TM_SHIFT));
+ timas[1] = cpu_to_be64(1ull << TM_SHIFT);
+ timas[2] = cpu_to_be64(xive->tm_base +
+ XIVE_TM_OS_PAGE * (1ull << TM_SHIFT));
+ timas[3] = cpu_to_be64(1ull << TM_SHIFT);
+
+ nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
+ xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+ _FDT(node = fdt_add_subnode(fdt, 0, nodename));
+ g_free(nodename);
+
+ _FDT(fdt_setprop_string(fdt, node, "device_type", "power-ivpe"));
+ _FDT(fdt_setprop(fdt, node, "reg", timas, sizeof(timas)));
+
+ _FDT(fdt_setprop_string(fdt, node, "compatible", "ibm,power-ivpe"));
+ _FDT(fdt_setprop(fdt, node, "ibm,xive-eq-sizes", eq_sizes,
+ sizeof(eq_sizes)));
+ _FDT(fdt_setprop(fdt, node, "ibm,xive-lisn-ranges", lisn_ranges,
+ sizeof(lisn_ranges)));
+
+ /* For Linux to link the LSIs to the interrupt controller. */
+ _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
+ _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+
+ /* For SLOF */
+ _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
+ _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
+
+ /*
+ * The "ibm,plat-res-int-priorities" property defines the priority
+ * ranges reserved by the hypervisor
+ */
+ _FDT(fdt_setprop(fdt, 0, "ibm,plat-res-int-priorities",
+ plat_res_int_priorities, sizeof(plat_res_int_priorities)));
+}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e27b92b87..f67d3c80bf 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -244,7 +244,8 @@ void xics_spapr_init(sPAPRMachineState *spapr)
spapr_register_hypercall(H_IPOLL, h_ipoll);
}
-void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
+void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+ uint32_t phandle)
{
uint32_t interrupt_server_ranges_prop[] = {
0, cpu_to_be32(nr_servers),
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
new file mode 100644
index 0000000000..ea33494338
--- /dev/null
+++ b/hw/intc/xive.c
@@ -0,0 +1,1599 @@
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/dma.h"
+#include "hw/qdev-properties.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/xive.h"
+#include "hw/ppc/xive_regs.h"
+
+/*
+ * XIVE Thread Interrupt Management context
+ */
+
+/*
+ * Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static uint8_t priority_to_ipb(uint8_t priority)
+{
+ return priority > XIVE_PRIORITY_MAX ?
+ 0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
+/*
+ * Convert an Interrupt Pending Buffer (IPB) register to a Pending
+ * Interrupt Priority Register (PIPR), which contains the priority of
+ * the most favored pending notification.
+ */
+static uint8_t ipb_to_pipr(uint8_t ibp)
+{
+ return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
+}
+
+static void ipb_update(uint8_t *regs, uint8_t priority)
+{
+ regs[TM_IPB] |= priority_to_ipb(priority);
+ regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+}
+
+static uint8_t exception_mask(uint8_t ring)
+{
+ switch (ring) {
+ case TM_QW1_OS:
+ return TM_QW1_NSR_EO;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
+{
+ uint8_t *regs = &tctx->regs[ring];
+ uint8_t nsr = regs[TM_NSR];
+ uint8_t mask = exception_mask(ring);
+
+ qemu_irq_lower(tctx->output);
+
+ if (regs[TM_NSR] & mask) {
+ uint8_t cppr = regs[TM_PIPR];
+
+ regs[TM_CPPR] = cppr;
+
+ /* Reset the pending buffer bit */
+ regs[TM_IPB] &= ~priority_to_ipb(cppr);
+ regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+
+ /* Drop Exception bit */
+ regs[TM_NSR] &= ~mask;
+ }
+
+ return (nsr << 8) | regs[TM_CPPR];
+}
+
+static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
+{
+ uint8_t *regs = &tctx->regs[ring];
+
+ if (regs[TM_PIPR] < regs[TM_CPPR]) {
+ regs[TM_NSR] |= exception_mask(ring);
+ qemu_irq_raise(tctx->output);
+ }
+}
+
+static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
+{
+ if (cppr > XIVE_PRIORITY_MAX) {
+ cppr = 0xff;
+ }
+
+ tctx->regs[ring + TM_CPPR] = cppr;
+
+ /* CPPR has changed, check if we need to raise a pending exception */
+ xive_tctx_notify(tctx, ring);
+}
+
+/*
+ * XIVE Thread Interrupt Management Area (TIMA)
+ */
+
+/*
+ * Define an access map for each page of the TIMA that we will use in
+ * the memory region ops to filter values when doing loads and stores
+ * of raw registers values
+ *
+ * Registers accessibility bits :
+ *
+ * 0x0 - no access
+ * 0x1 - write only
+ * 0x2 - read only
+ * 0x3 - read/write
+ */
+
+static const uint8_t xive_tm_hw_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 0,
+};
+
+static const uint8_t xive_tm_hv_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 0, 0,
+};
+
+static const uint8_t xive_tm_os_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-1 OS */ 2, 3, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const uint8_t xive_tm_user_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-1 OS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/*
+ * Overall TIMA access map for the thread interrupt management context
+ * registers
+ */
+static const uint8_t *xive_tm_views[] = {
+ [XIVE_TM_HW_PAGE] = xive_tm_hw_view,
+ [XIVE_TM_HV_PAGE] = xive_tm_hv_view,
+ [XIVE_TM_OS_PAGE] = xive_tm_os_view,
+ [XIVE_TM_USER_PAGE] = xive_tm_user_view,
+};
+
+/*
+ * Computes a register access mask for a given offset in the TIMA
+ */
+static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
+{
+ uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
+ uint8_t reg_offset = offset & 0x3F;
+ uint8_t reg_mask = write ? 0x1 : 0x2;
+ uint64_t mask = 0x0;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
+ mask |= (uint64_t) 0xff << (8 * (size - i - 1));
+ }
+ }
+
+ return mask;
+}
+
+static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
+ unsigned size)
+{
+ uint8_t ring_offset = offset & 0x30;
+ uint8_t reg_offset = offset & 0x3F;
+ uint64_t mask = xive_tm_mask(offset, size, true);
+ int i;
+
+ /*
+ * Only 4 or 8 bytes stores are allowed and the User ring is
+ * excluded
+ */
+ if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
+ HWADDR_PRIx"\n", offset);
+ return;
+ }
+
+ /*
+ * Use the register offset for the raw values and filter out
+ * reserved values
+ */
+ for (i = 0; i < size; i++) {
+ uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
+ if (byte_mask) {
+ tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
+ byte_mask;
+ }
+ }
+}
+
+static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
+{
+ uint8_t ring_offset = offset & 0x30;
+ uint8_t reg_offset = offset & 0x3F;
+ uint64_t mask = xive_tm_mask(offset, size, false);
+ uint64_t ret;
+ int i;
+
+ /*
+ * Only 4 or 8 bytes loads are allowed and the User ring is
+ * excluded
+ */
+ if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
+ HWADDR_PRIx"\n", offset);
+ return -1;
+ }
+
+ /* Use the register offset for the raw values */
+ ret = 0;
+ for (i = 0; i < size; i++) {
+ ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
+ }
+
+ /* filter out reserved values */
+ return ret & mask;
+}
+
+/*
+ * The TM context is mapped twice within each page. Stores and loads
+ * to the first mapping below 2K write and read the specified values
+ * without modification. The second mapping above 2K performs specific
+ * state changes (side effects) in addition to setting/returning the
+ * interrupt management area context of the processor thread.
+ */
+static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
+{
+ return xive_tctx_accept(tctx, TM_QW1_OS);
+}
+
+static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
+}
+
+/*
+ * Adjust the IPB to allow a CPU to process event queues of other
+ * priorities during one physical interrupt cycle.
+ */
+static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
+ xive_tctx_notify(tctx, TM_QW1_OS);
+}
+
+/*
+ * Define a mapping of "special" operations depending on the TIMA page
+ * offset and the size of the operation.
+ */
+typedef struct XiveTmOp {
+ uint8_t page_offset;
+ uint32_t op_offset;
+ unsigned size;
+ void (*write_handler)(XiveTCTX *tctx, hwaddr offset, uint64_t value,
+ unsigned size);
+ uint64_t (*read_handler)(XiveTCTX *tctx, hwaddr offset, unsigned size);
+} XiveTmOp;
+
+static const XiveTmOp xive_tm_operations[] = {
+ /*
+ * MMIOs below 2K : raw values and special operations without side
+ * effects
+ */
+ { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL },
+
+ /* MMIOs above 2K : special operations with side effects */
+ { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg },
+ { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
+};
+
+static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
+{
+ uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
+ uint32_t op_offset = offset & 0xFFF;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) {
+ const XiveTmOp *xto = &xive_tm_operations[i];
+
+ /* Accesses done from a more privileged TIMA page is allowed */
+ if (xto->page_offset >= page_offset &&
+ xto->op_offset == op_offset &&
+ xto->size == size &&
+ ((write && xto->write_handler) || (!write && xto->read_handler))) {
+ return xto;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * TIMA MMIO handlers
+ */
+static void xive_tm_write(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+ XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+ const XiveTmOp *xto;
+
+ /*
+ * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
+ */
+
+ /*
+ * First, check for special operations in the 2K region
+ */
+ if (offset & 0x800) {
+ xto = xive_tm_find_op(offset, size, true);
+ if (!xto) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA"
+ "@%"HWADDR_PRIx"\n", offset);
+ } else {
+ xto->write_handler(tctx, offset, value, size);
+ }
+ return;
+ }
+
+ /*
+ * Then, for special operations in the region below 2K.
+ */
+ xto = xive_tm_find_op(offset, size, true);
+ if (xto) {
+ xto->write_handler(tctx, offset, value, size);
+ return;
+ }
+
+ /*
+ * Finish with raw access to the register values
+ */
+ xive_tm_raw_write(tctx, offset, value, size);
+}
+
+static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+ XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+ const XiveTmOp *xto;
+
+ /*
+ * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
+ */
+
+ /*
+ * First, check for special operations in the 2K region
+ */
+ if (offset & 0x800) {
+ xto = xive_tm_find_op(offset, size, false);
+ if (!xto) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
+ "@%"HWADDR_PRIx"\n", offset);
+ return -1;
+ }
+ return xto->read_handler(tctx, offset, size);
+ }
+
+ /*
+ * Then, for special operations in the region below 2K.
+ */
+ xto = xive_tm_find_op(offset, size, false);
+ if (xto) {
+ return xto->read_handler(tctx, offset, size);
+ }
+
+ /*
+ * Finish with raw access to the register values
+ */
+ return xive_tm_raw_read(tctx, offset, size);
+}
+
+const MemoryRegionOps xive_tm_ops = {
+ .read = xive_tm_read,
+ .write = xive_tm_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+};
+
+static inline uint32_t xive_tctx_word2(uint8_t *ring)
+{
+ return *((uint32_t *) &ring[TM_WORD2]);
+}
+
+static char *xive_tctx_ring_print(uint8_t *ring)
+{
+ uint32_t w2 = xive_tctx_word2(ring);
+
+ return g_strdup_printf("%02x %02x %02x %02x %02x "
+ "%02x %02x %02x %08x",
+ ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
+ ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
+ be32_to_cpu(w2));
+}
+
+static const char * const xive_tctx_ring_names[] = {
+ "USER", "OS", "POOL", "PHYS",
+};
+
+void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
+{
+ int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
+ int i;
+
+ monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
+ " W2\n", cpu_index);
+
+ for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
+ char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
+ monitor_printf(mon, "CPU[%04x]: %4s %s\n", cpu_index,
+ xive_tctx_ring_names[i], s);
+ g_free(s);
+ }
+}
+
+static void xive_tctx_reset(void *dev)
+{
+ XiveTCTX *tctx = XIVE_TCTX(dev);
+
+ memset(tctx->regs, 0, sizeof(tctx->regs));
+
+ /* Set some defaults */
+ tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
+ tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
+ tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
+
+ /*
+ * Initialize PIPR to 0xFF to avoid phantom interrupts when the
+ * CPPR is first set.
+ */
+ tctx->regs[TM_QW1_OS + TM_PIPR] =
+ ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
+}
+
+static void xive_tctx_realize(DeviceState *dev, Error **errp)
+{
+ XiveTCTX *tctx = XIVE_TCTX(dev);
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ Object *obj;
+ Error *local_err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "cpu", &local_err);
+ if (!obj) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "required link 'cpu' not found: ");
+ return;
+ }
+
+ cpu = POWERPC_CPU(obj);
+ tctx->cs = CPU(obj);
+
+ env = &cpu->env;
+ switch (PPC_INPUT(env)) {
+ case PPC_FLAGS_INPUT_POWER7:
+ tctx->output = env->irq_inputs[POWER7_INPUT_INT];
+ break;
+
+ default:
+ error_setg(errp, "XIVE interrupt controller does not support "
+ "this CPU bus model");
+ return;
+ }
+
+ qemu_register_reset(xive_tctx_reset, dev);
+}
+
+static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
+{
+ qemu_unregister_reset(xive_tctx_reset, dev);
+}
+
+static const VMStateDescription vmstate_xive_tctx = {
+ .name = TYPE_XIVE_TCTX,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_BUFFER(regs, XiveTCTX),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void xive_tctx_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "XIVE Interrupt Thread Context";
+ dc->realize = xive_tctx_realize;
+ dc->unrealize = xive_tctx_unrealize;
+ dc->vmsd = &vmstate_xive_tctx;
+}
+
+static const TypeInfo xive_tctx_info = {
+ .name = TYPE_XIVE_TCTX,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiveTCTX),
+ .class_init = xive_tctx_class_init,
+};
+
+Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp)
+{
+ Error *local_err = NULL;
+ Object *obj;
+
+ obj = object_new(TYPE_XIVE_TCTX);
+ object_property_add_child(cpu, TYPE_XIVE_TCTX, obj, &error_abort);
+ object_unref(obj);
+ object_property_add_const_link(obj, "cpu", cpu, &error_abort);
+ object_property_set_bool(obj, true, "realized", &local_err);
+ if (local_err) {
+ goto error;
+ }
+
+ return obj;
+
+error:
+ object_unparent(obj);
+ error_propagate(errp, local_err);
+ return NULL;
+}
+
+/*
+ * XIVE ESB helpers
+ */
+
+static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
+{
+ uint8_t old_pq = *pq & 0x3;
+
+ *pq &= ~0x3;
+ *pq |= value & 0x3;
+
+ return old_pq;
+}
+
+static bool xive_esb_trigger(uint8_t *pq)
+{
+ uint8_t old_pq = *pq & 0x3;
+
+ switch (old_pq) {
+ case XIVE_ESB_RESET:
+ xive_esb_set(pq, XIVE_ESB_PENDING);
+ return true;
+ case XIVE_ESB_PENDING:
+ case XIVE_ESB_QUEUED:
+ xive_esb_set(pq, XIVE_ESB_QUEUED);
+ return false;
+ case XIVE_ESB_OFF:
+ xive_esb_set(pq, XIVE_ESB_OFF);
+ return false;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool xive_esb_eoi(uint8_t *pq)
+{
+ uint8_t old_pq = *pq & 0x3;
+
+ switch (old_pq) {
+ case XIVE_ESB_RESET:
+ case XIVE_ESB_PENDING:
+ xive_esb_set(pq, XIVE_ESB_RESET);
+ return false;
+ case XIVE_ESB_QUEUED:
+ xive_esb_set(pq, XIVE_ESB_PENDING);
+ return true;
+ case XIVE_ESB_OFF:
+ xive_esb_set(pq, XIVE_ESB_OFF);
+ return false;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * XIVE Interrupt Source (or IVSE)
+ */
+
+uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
+{
+ assert(srcno < xsrc->nr_irqs);
+
+ return xsrc->status[srcno] & 0x3;
+}
+
+uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
+{
+ assert(srcno < xsrc->nr_irqs);
+
+ return xive_esb_set(&xsrc->status[srcno], pq);
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+ uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
+
+ xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+
+ switch (old_pq) {
+ case XIVE_ESB_RESET:
+ xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+ bool ret;
+
+ assert(srcno < xsrc->nr_irqs);
+
+ ret = xive_esb_trigger(&xsrc->status[srcno]);
+
+ if (xive_source_irq_is_lsi(xsrc, srcno) &&
+ xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "XIVE: queued an event on LSI IRQ %d\n", srcno);
+ }
+
+ return ret;
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
+{
+ bool ret;
+
+ assert(srcno < xsrc->nr_irqs);
+
+ ret = xive_esb_eoi(&xsrc->status[srcno]);
+
+ /*
+ * LSI sources do not set the Q bit but they can still be
+ * asserted, in which case we should forward a new event
+ * notification
+ */
+ if (xive_source_irq_is_lsi(xsrc, srcno) &&
+ xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+ ret = xive_source_lsi_trigger(xsrc, srcno);
+ }
+
+ return ret;
+}
+
+/*
+ * Forward the source event notification to the Router
+ */
+static void xive_source_notify(XiveSource *xsrc, int srcno)
+{
+ XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
+
+ if (xnc->notify) {
+ xnc->notify(xsrc->xive, srcno);
+ }
+}
+
+/*
+ * In a two pages ESB MMIO setting, even page is the trigger page, odd
+ * page is for management
+ */
+static inline bool addr_is_even(hwaddr addr, uint32_t shift)
+{
+ return !((addr >> shift) & 1);
+}
+
+static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
+{
+ return xive_source_esb_has_2page(xsrc) &&
+ addr_is_even(addr, xsrc->esb_shift - 1);
+}
+
+/*
+ * ESB MMIO loads
+ * Trigger page Management/EOI page
+ *
+ * ESB MMIO setting 2 pages 1 or 2 pages
+ *
+ * 0x000 .. 0x3FF -1 EOI and return 0|1
+ * 0x400 .. 0x7FF -1 EOI and return 0|1
+ * 0x800 .. 0xBFF -1 return PQ
+ * 0xC00 .. 0xCFF -1 return PQ and atomically PQ=00
+ * 0xD00 .. 0xDFF -1 return PQ and atomically PQ=01
+ * 0xE00 .. 0xDFF -1 return PQ and atomically PQ=10
+ * 0xF00 .. 0xDFF -1 return PQ and atomically PQ=11
+ */
+static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
+{
+ XiveSource *xsrc = XIVE_SOURCE(opaque);
+ uint32_t offset = addr & 0xFFF;
+ uint32_t srcno = addr >> xsrc->esb_shift;
+ uint64_t ret = -1;
+
+ /* In a two pages ESB MMIO setting, trigger page should not be read */
+ if (xive_source_is_trigger_page(xsrc, addr)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "XIVE: invalid load on IRQ %d trigger page at "
+ "0x%"HWADDR_PRIx"\n", srcno, addr);
+ return -1;
+ }
+
+ switch (offset) {
+ case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
+ ret = xive_source_esb_eoi(xsrc, srcno);
+
+ /* Forward the source event notification for routing */
+ if (ret) {
+ xive_source_notify(xsrc, srcno);
+ }
+ break;
+
+ case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
+ ret = xive_source_esb_get(xsrc, srcno);
+ break;
+
+ case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+ case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+ case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+ case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+ ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
+ offset);
+ }
+
+ return ret;
+}
+
+/*
+ * ESB MMIO stores
+ * Trigger page Management/EOI page
+ *
+ * ESB MMIO setting 2 pages 1 or 2 pages
+ *
+ * 0x000 .. 0x3FF Trigger Trigger
+ * 0x400 .. 0x7FF Trigger EOI
+ * 0x800 .. 0xBFF Trigger undefined
+ * 0xC00 .. 0xCFF Trigger PQ=00
+ * 0xD00 .. 0xDFF Trigger PQ=01
+ * 0xE00 .. 0xDFF Trigger PQ=10
+ * 0xF00 .. 0xDFF Trigger PQ=11
+ */
+static void xive_source_esb_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ XiveSource *xsrc = XIVE_SOURCE(opaque);
+ uint32_t offset = addr & 0xFFF;
+ uint32_t srcno = addr >> xsrc->esb_shift;
+ bool notify = false;
+
+ /* In a two pages ESB MMIO setting, trigger page only triggers */
+ if (xive_source_is_trigger_page(xsrc, addr)) {
+ notify = xive_source_esb_trigger(xsrc, srcno);
+ goto out;
+ }
+
+ switch (offset) {
+ case 0 ... 0x3FF:
+ notify = xive_source_esb_trigger(xsrc, srcno);
+ break;
+
+ case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
+ if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "XIVE: invalid Store EOI for IRQ %d\n", srcno);
+ return;
+ }
+
+ notify = xive_source_esb_eoi(xsrc, srcno);
+ break;
+
+ case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+ case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+ case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+ case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+ xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+ break;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
+ offset);
+ return;
+ }
+
+out:
+ /* Forward the source event notification for routing */
+ if (notify) {
+ xive_source_notify(xsrc, srcno);
+ }
+}
+
+static const MemoryRegionOps xive_source_esb_ops = {
+ .read = xive_source_esb_read,
+ .write = xive_source_esb_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static void xive_source_set_irq(void *opaque, int srcno, int val)
+{
+ XiveSource *xsrc = XIVE_SOURCE(opaque);
+ bool notify = false;
+
+ if (xive_source_irq_is_lsi(xsrc, srcno)) {
+ if (val) {
+ notify = xive_source_lsi_trigger(xsrc, srcno);
+ } else {
+ xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+ }
+ } else {
+ if (val) {
+ notify = xive_source_esb_trigger(xsrc, srcno);
+ }
+ }
+
+ /* Forward the source event notification for routing */
+ if (notify) {
+ xive_source_notify(xsrc, srcno);
+ }
+}
+
+void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
+{
+ int i;
+
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ uint8_t pq = xive_source_esb_get(xsrc, i);
+
+ if (pq == XIVE_ESB_OFF) {
+ continue;
+ }
+
+ monitor_printf(mon, " %08x %s %c%c%c\n", i + offset,
+ xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
+ pq & XIVE_ESB_VAL_P ? 'P' : '-',
+ pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ');
+ }
+}
+
+static void xive_source_reset(void *dev)
+{
+ XiveSource *xsrc = XIVE_SOURCE(dev);
+
+ /* Do not clear the LSI bitmap */
+
+ /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
+ memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
+}
+
+static void xive_source_realize(DeviceState *dev, Error **errp)
+{
+ XiveSource *xsrc = XIVE_SOURCE(dev);
+ Object *obj;
+ Error *local_err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
+ if (!obj) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "required link 'xive' not found: ");
+ return;
+ }
+
+ xsrc->xive = XIVE_NOTIFIER(obj);
+
+ if (!xsrc->nr_irqs) {
+ error_setg(errp, "Number of interrupt needs to be greater than 0");
+ return;
+ }
+
+ if (xsrc->esb_shift != XIVE_ESB_4K &&
+ xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
+ xsrc->esb_shift != XIVE_ESB_64K &&
+ xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
+ error_setg(errp, "Invalid ESB shift setting");
+ return;
+ }
+
+ xsrc->status = g_malloc0(xsrc->nr_irqs);
+ xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
+
+ memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+ &xive_source_esb_ops, xsrc, "xive.esb",
+ (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+
+ xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
+ xsrc->nr_irqs);
+
+ qemu_register_reset(xive_source_reset, dev);
+}
+
+static const VMStateDescription vmstate_xive_source = {
+ .name = TYPE_XIVE_SOURCE,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
+ VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+/*
+ * The default XIVE interrupt source setting for the ESB MMIOs is two
+ * 64k pages without Store EOI, to be in sync with KVM.
+ */
+static Property xive_source_properties[] = {
+ DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
+ DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
+ DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_source_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "XIVE Interrupt Source";
+ dc->props = xive_source_properties;
+ dc->realize = xive_source_realize;
+ dc->vmsd = &vmstate_xive_source;
+}
+
+static const TypeInfo xive_source_info = {
+ .name = TYPE_XIVE_SOURCE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiveSource),
+ .class_init = xive_source_class_init,
+};
+
+/*
+ * XiveEND helpers
+ */
+
+void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
+{
+ uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qentries = 1 << (qsize + 10);
+ int i;
+
+ /*
+ * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
+ */
+ monitor_printf(mon, " [ ");
+ qindex = (qindex - (width - 1)) & (qentries - 1);
+ for (i = 0; i < width; i++) {
+ uint64_t qaddr = qaddr_base + (qindex << 2);
+ uint32_t qdata = -1;
+
+ if (dma_memory_read(&address_space_memory, qaddr, &qdata,
+ sizeof(qdata))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
+ HWADDR_PRIx "\n", qaddr);
+ return;
+ }
+ monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
+ be32_to_cpu(qdata));
+ qindex = (qindex + 1) & (qentries - 1);
+ }
+}
+
+void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
+{
+ uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qentries = 1 << (qsize + 10);
+
+ uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+ uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+
+ if (!xive_end_is_valid(end)) {
+ return;
+ }
+
+ monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
+ "% 6d/%5d ^%d", end_idx,
+ xive_end_is_valid(end) ? 'v' : '-',
+ xive_end_is_enqueue(end) ? 'q' : '-',
+ xive_end_is_notify(end) ? 'n' : '-',
+ xive_end_is_backlog(end) ? 'b' : '-',
+ xive_end_is_escalate(end) ? 'e' : '-',
+ priority, nvt, qaddr_base, qindex, qentries, qgen);
+
+ xive_end_queue_pic_print_info(end, 6, mon);
+ monitor_printf(mon, "]\n");
+}
+
+static void xive_end_enqueue(XiveEND *end, uint32_t data)
+{
+ uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+
+ uint64_t qaddr = qaddr_base + (qindex << 2);
+ uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
+ uint32_t qentries = 1 << (qsize + 10);
+
+ if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
+ HWADDR_PRIx "\n", qaddr);
+ return;
+ }
+
+ qindex = (qindex + 1) & (qentries - 1);
+ if (qindex == 0) {
+ qgen ^= 1;
+ end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
+ }
+ end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
+}
+
+/*
+ * XIVE Router (aka. Virtualization Controller or IVRE)
+ */
+
+int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+ XiveEAS *eas)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
+}
+
+int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+ XiveEND *end)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->get_end(xrtr, end_blk, end_idx, end);
+}
+
+int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+ XiveEND *end, uint8_t word_number)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
+}
+
+int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+ XiveNVT *nvt)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
+}
+
+int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+ XiveNVT *nvt, uint8_t word_number)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
+}
+
+/*
+ * The thread context register words are in big-endian format.
+ */
+static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint32_t logic_serv)
+{
+ uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
+ uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
+ uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
+ uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
+
+ /*
+ * TODO (PowerNV): ignore mode. The low order bits of the NVT
+ * identifier are ignored in the "CAM" match.
+ */
+
+ if (format == 0) {
+ if (cam_ignore == true) {
+ /*
+ * F=0 & i=1: Logical server notification (bits ignored at
+ * the end of the NVT identifier)
+ */
+ qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
+ nvt_blk, nvt_idx);
+ return -1;
+ }
+
+ /* F=0 & i=0: Specific NVT notification */
+
+ /* TODO (PowerNV) : PHYS ring */
+
+ /* HV POOL ring */
+ if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
+ cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
+ return TM_QW2_HV_POOL;
+ }
+
+ /* OS ring */
+ if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
+ cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
+ return TM_QW1_OS;
+ }
+ } else {
+ /* F=1 : User level Event-Based Branch (EBB) notification */
+
+ /* USER ring */
+ if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
+ (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
+ (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
+ (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
+ return TM_QW0_USER;
+ }
+ }
+ return -1;
+}
+
+typedef struct XiveTCTXMatch {
+ XiveTCTX *tctx;
+ uint8_t ring;
+} XiveTCTXMatch;
+
+static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match)
+{
+ CPUState *cs;
+
+ /*
+ * TODO (PowerNV): handle chip_id overwrite of block field for
+ * hardwired CAM compares
+ */
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+ int ring;
+
+ /*
+ * HW checks that the CPU is enabled in the Physical Thread
+ * Enable Register (PTER).
+ */
+
+ /*
+ * Check the thread context CAM lines and record matches. We
+ * will handle CPU exception delivery later
+ */
+ ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
+ cam_ignore, logic_serv);
+ /*
+ * Save the context and follow on to catch duplicates, that we
+ * don't support yet.
+ */
+ if (ring != -1) {
+ if (match->tctx) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
+ "context NVT %x/%x\n", nvt_blk, nvt_idx);
+ return false;
+ }
+
+ match->ring = ring;
+ match->tctx = tctx;
+ }
+ }
+
+ if (!match->tctx) {
+ qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n",
+ nvt_blk, nvt_idx);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * This is our simple Xive Presenter Engine model. It is merged in the
+ * Router as it does not require an extra object.
+ *
+ * It receives notification requests sent by the IVRE to find one
+ * matching NVT (or more) dispatched on the processor threads. In case
+ * of a single NVT notification, the process is abreviated and the
+ * thread is signaled if a match is found. In case of a logical server
+ * notification (bits ignored at the end of the NVT identifier), the
+ * IVPE and IVRE select a winning thread using different filters. This
+ * involves 2 or 3 exchanges on the PowerBus that the model does not
+ * support.
+ *
+ * The parameters represent what is sent on the PowerBus
+ */
+static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv)
+{
+ XiveNVT nvt;
+ XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
+ bool found;
+
+ /* NVT cache lookup */
+ if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
+ nvt_blk, nvt_idx);
+ return;
+ }
+
+ if (!xive_nvt_is_valid(&nvt)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
+ nvt_blk, nvt_idx);
+ return;
+ }
+
+ found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
+ priority, logic_serv, &match);
+ if (found) {
+ ipb_update(&match.tctx->regs[match.ring], priority);
+ xive_tctx_notify(match.tctx, match.ring);
+ return;
+ }
+
+ /* Record the IPB in the associated NVT structure */
+ ipb_update((uint8_t *) &nvt.w4, priority);
+ xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
+
+ /*
+ * If no matching NVT is dispatched on a HW thread :
+ * - update the NVT structure if backlog is activated
+ * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
+ * activated
+ */
+}
+
+/*
+ * An END trigger can come from an event trigger (IPI or HW) or from
+ * another chip. We don't model the PowerBus but the END trigger
+ * message has the same parameters than in the function below.
+ */
+static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
+ uint32_t end_idx, uint32_t end_data)
+{
+ XiveEND end;
+ uint8_t priority;
+ uint8_t format;
+
+ /* END cache lookup */
+ if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
+ end_idx);
+ return;
+ }
+
+ if (!xive_end_is_valid(&end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
+ end_blk, end_idx);
+ return;
+ }
+
+ if (xive_end_is_enqueue(&end)) {
+ xive_end_enqueue(&end, end_data);
+ /* Enqueuing event data modifies the EQ toggle and index */
+ xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
+ }
+
+ /*
+ * The W7 format depends on the F bit in W6. It defines the type
+ * of the notification :
+ *
+ * F=0 : single or multiple NVT notification
+ * F=1 : User level Event-Based Branch (EBB) notification, no
+ * priority
+ */
+ format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
+ priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
+
+ /* The END is masked */
+ if (format == 0 && priority == 0xff) {
+ return;
+ }
+
+ /*
+ * Check the END ESn (Event State Buffer for notification) for
+ * even futher coalescing in the Router
+ */
+ if (!xive_end_is_notify(&end)) {
+ uint8_t pq = xive_get_field32(END_W1_ESn, end.w1);
+ bool notify = xive_esb_trigger(&pq);
+
+ if (pq != xive_get_field32(END_W1_ESn, end.w1)) {
+ end.w1 = xive_set_field32(END_W1_ESn, end.w1, pq);
+ xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
+ }
+
+ /* ESn[Q]=1 : end of notification */
+ if (!notify) {
+ return;
+ }
+ }
+
+ /*
+ * Follows IVPE notification
+ */
+ xive_presenter_notify(xrtr, format,
+ xive_get_field32(END_W6_NVT_BLOCK, end.w6),
+ xive_get_field32(END_W6_NVT_INDEX, end.w6),
+ xive_get_field32(END_W7_F0_IGNORE, end.w7),
+ priority,
+ xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
+
+ /* TODO: Auto EOI. */
+}
+
+static void xive_router_notify(XiveNotifier *xn, uint32_t lisn)
+{
+ XiveRouter *xrtr = XIVE_ROUTER(xn);
+ uint8_t eas_blk = XIVE_SRCNO_BLOCK(lisn);
+ uint32_t eas_idx = XIVE_SRCNO_INDEX(lisn);
+ XiveEAS eas;
+
+ /* EAS cache lookup */
+ if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
+ return;
+ }
+
+ /*
+ * The IVRE checks the State Bit Cache at this point. We skip the
+ * SBC lookup because the state bits of the sources are modeled
+ * internally in QEMU.
+ */
+
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
+ return;
+ }
+
+ if (xive_eas_is_masked(&eas)) {
+ /* Notification completed */
+ return;
+ }
+
+ /*
+ * The event trigger becomes an END trigger
+ */
+ xive_router_end_notify(xrtr,
+ xive_get_field64(EAS_END_BLOCK, eas.w),
+ xive_get_field64(EAS_END_INDEX, eas.w),
+ xive_get_field64(EAS_END_DATA, eas.w));
+}
+
+static void xive_router_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
+
+ dc->desc = "XIVE Router Engine";
+ xnc->notify = xive_router_notify;
+}
+
+static const TypeInfo xive_router_info = {
+ .name = TYPE_XIVE_ROUTER,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .abstract = true,
+ .class_size = sizeof(XiveRouterClass),
+ .class_init = xive_router_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XIVE_NOTIFIER },
+ { }
+ }
+};
+
+void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
+{
+ if (!xive_eas_is_valid(eas)) {
+ return;
+ }
+
+ monitor_printf(mon, " %08x %s end:%02x/%04x data:%08x\n",
+ lisn, xive_eas_is_masked(eas) ? "M" : " ",
+ (uint8_t) xive_get_field64(EAS_END_BLOCK, eas->w),
+ (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
+ (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
+}
+
+/*
+ * END ESB MMIO loads
+ */
+static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
+{
+ XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
+ uint32_t offset = addr & 0xFFF;
+ uint8_t end_blk;
+ uint32_t end_idx;
+ XiveEND end;
+ uint32_t end_esmask;
+ uint8_t pq;
+ uint64_t ret = -1;
+
+ end_blk = xsrc->block_id;
+ end_idx = addr >> (xsrc->esb_shift + 1);
+
+ if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
+ end_idx);
+ return -1;
+ }
+
+ if (!xive_end_is_valid(&end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
+ end_blk, end_idx);
+ return -1;
+ }
+
+ end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
+ pq = xive_get_field32(end_esmask, end.w1);
+
+ switch (offset) {
+ case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
+ ret = xive_esb_eoi(&pq);
+
+ /* Forward the source event notification for routing ?? */
+ break;
+
+ case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
+ ret = pq;
+ break;
+
+ case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+ case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+ case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+ case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+ ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
+ offset);
+ return -1;
+ }
+
+ if (pq != xive_get_field32(end_esmask, end.w1)) {
+ end.w1 = xive_set_field32(end_esmask, end.w1, pq);
+ xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
+ }
+
+ return ret;
+}
+
+/*
+ * END ESB MMIO stores are invalid
+ */
+static void xive_end_source_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
+ HWADDR_PRIx"\n", addr);
+}
+
+static const MemoryRegionOps xive_end_source_ops = {
+ .read = xive_end_source_read,
+ .write = xive_end_source_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static void xive_end_source_realize(DeviceState *dev, Error **errp)
+{
+ XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
+ Object *obj;
+ Error *local_err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
+ if (!obj) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "required link 'xive' not found: ");
+ return;
+ }
+
+ xsrc->xrtr = XIVE_ROUTER(obj);
+
+ if (!xsrc->nr_ends) {
+ error_setg(errp, "Number of interrupt needs to be greater than 0");
+ return;
+ }
+
+ if (xsrc->esb_shift != XIVE_ESB_4K &&
+ xsrc->esb_shift != XIVE_ESB_64K) {
+ error_setg(errp, "Invalid ESB shift setting");
+ return;
+ }
+
+ /*
+ * Each END is assigned an even/odd pair of MMIO pages, the even page
+ * manages the ESn field while the odd page manages the ESe field.
+ */
+ memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+ &xive_end_source_ops, xsrc, "xive.end",
+ (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
+}
+
+static Property xive_end_source_properties[] = {
+ DEFINE_PROP_UINT8("block-id", XiveENDSource, block_id, 0),
+ DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
+ DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_end_source_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "XIVE END Source";
+ dc->props = xive_end_source_properties;
+ dc->realize = xive_end_source_realize;
+}
+
+static const TypeInfo xive_end_source_info = {
+ .name = TYPE_XIVE_END_SOURCE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiveENDSource),
+ .class_init = xive_end_source_class_init,
+};
+
+/*
+ * XIVE Fabric
+ */
+static const TypeInfo xive_fabric_info = {
+ .name = TYPE_XIVE_NOTIFIER,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(XiveNotifierClass),
+};
+
+static void xive_register_types(void)
+{
+ type_register_static(&xive_source_info);
+ type_register_static(&xive_fabric_info);
+ type_register_static(&xive_router_info);
+ type_register_static(&xive_end_source_info);
+ type_register_static(&xive_tctx_info);
+}
+
+type_init(xive_register_types)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 3648630386..54746a4030 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -18,7 +18,6 @@
#include "qemu/osdep.h"
#include "hw/hw.h"
#include "hw/pci/pci.h"
-#include "net/net.h"
#include "net/tap.h"
#include "net/checksum.h"
#include "sysemu/sysemu.h"
@@ -29,6 +28,7 @@
#include "migration/register.h"
#include "vmxnet3.h"
+#include "vmxnet3_defs.h"
#include "vmxnet_debug.h"
#include "vmware_utils.h"
#include "net_tx_pkt.h"
@@ -131,23 +131,11 @@ typedef struct VMXNET3Class {
DeviceRealize parent_dc_realize;
} VMXNET3Class;
-#define TYPE_VMXNET3 "vmxnet3"
-#define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
-
#define VMXNET3_DEVICE_CLASS(klass) \
OBJECT_CLASS_CHECK(VMXNET3Class, (klass), TYPE_VMXNET3)
#define VMXNET3_DEVICE_GET_CLASS(obj) \
OBJECT_GET_CLASS(VMXNET3Class, (obj), TYPE_VMXNET3)
-/* Cyclic ring abstraction */
-typedef struct {
- hwaddr pa;
- uint32_t size;
- uint32_t cell_size;
- uint32_t next;
- uint8_t gen;
-} Vmxnet3Ring;
-
static inline void vmxnet3_ring_init(PCIDevice *d,
Vmxnet3Ring *ring,
hwaddr pa,
@@ -245,108 +233,6 @@ vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
descr->rsvd, descr->dtype, descr->ext1, descr->btype);
}
-/* Device state and helper functions */
-#define VMXNET3_RX_RINGS_PER_QUEUE (2)
-
-typedef struct {
- Vmxnet3Ring tx_ring;
- Vmxnet3Ring comp_ring;
-
- uint8_t intr_idx;
- hwaddr tx_stats_pa;
- struct UPT1_TxStats txq_stats;
-} Vmxnet3TxqDescr;
-
-typedef struct {
- Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
- Vmxnet3Ring comp_ring;
- uint8_t intr_idx;
- hwaddr rx_stats_pa;
- struct UPT1_RxStats rxq_stats;
-} Vmxnet3RxqDescr;
-
-typedef struct {
- bool is_masked;
- bool is_pending;
- bool is_asserted;
-} Vmxnet3IntState;
-
-typedef struct {
- PCIDevice parent_obj;
- NICState *nic;
- NICConf conf;
- MemoryRegion bar0;
- MemoryRegion bar1;
- MemoryRegion msix_bar;
-
- Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
- Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
-
- /* Whether MSI-X support was installed successfully */
- bool msix_used;
- hwaddr drv_shmem;
- hwaddr temp_shared_guest_driver_memory;
-
- uint8_t txq_num;
-
- /* This boolean tells whether RX packet being indicated has to */
- /* be split into head and body chunks from different RX rings */
- bool rx_packets_compound;
-
- bool rx_vlan_stripping;
- bool lro_supported;
-
- uint8_t rxq_num;
-
- /* Network MTU */
- uint32_t mtu;
-
- /* Maximum number of fragments for indicated TX packets */
- uint32_t max_tx_frags;
-
- /* Maximum number of fragments for indicated RX packets */
- uint16_t max_rx_frags;
-
- /* Index for events interrupt */
- uint8_t event_int_idx;
-
- /* Whether automatic interrupts masking enabled */
- bool auto_int_masking;
-
- bool peer_has_vhdr;
-
- /* TX packets to QEMU interface */
- struct NetTxPkt *tx_pkt;
- uint32_t offload_mode;
- uint32_t cso_or_gso_size;
- uint16_t tci;
- bool needs_vlan;
-
- struct NetRxPkt *rx_pkt;
-
- bool tx_sop;
- bool skip_current_tx_pkt;
-
- uint32_t device_active;
- uint32_t last_command;
-
- uint32_t link_status_and_speed;
-
- Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
-
- uint32_t temp_mac; /* To store the low part first */
-
- MACAddr perm_mac;
- uint32_t vlan_table[VMXNET3_VFT_SIZE];
- uint32_t rx_mode;
- MACAddr *mcast_list;
- uint32_t mcast_list_len;
- uint32_t mcast_list_buff_size; /* needed for live migration. */
-
- /* Compatibility flags for migration */
- uint32_t compat_flags;
-} VMXNET3State;
-
/* Interrupt management */
/*
diff --git a/hw/net/vmxnet3_defs.h b/hw/net/vmxnet3_defs.h
new file mode 100644
index 0000000000..6c19d29b12
--- /dev/null
+++ b/hw/net/vmxnet3_defs.h
@@ -0,0 +1,133 @@
+/*
+ * QEMU VMWARE VMXNET3 paravirtual NIC
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Tamir Shomer <tamirs@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "net/net.h"
+#include "hw/net/vmxnet3.h"
+
+#define TYPE_VMXNET3 "vmxnet3"
+#define VMXNET3(obj) OBJECT_CHECK(VMXNET3State, (obj), TYPE_VMXNET3)
+
+/* Device state and helper functions */
+#define VMXNET3_RX_RINGS_PER_QUEUE (2)
+
+/* Cyclic ring abstraction */
+typedef struct {
+ hwaddr pa;
+ uint32_t size;
+ uint32_t cell_size;
+ uint32_t next;
+ uint8_t gen;
+} Vmxnet3Ring;
+
+typedef struct {
+ Vmxnet3Ring tx_ring;
+ Vmxnet3Ring comp_ring;
+
+ uint8_t intr_idx;
+ hwaddr tx_stats_pa;
+ struct UPT1_TxStats txq_stats;
+} Vmxnet3TxqDescr;
+
+typedef struct {
+ Vmxnet3Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
+ Vmxnet3Ring comp_ring;
+ uint8_t intr_idx;
+ hwaddr rx_stats_pa;
+ struct UPT1_RxStats rxq_stats;
+} Vmxnet3RxqDescr;
+
+typedef struct {
+ bool is_masked;
+ bool is_pending;
+ bool is_asserted;
+} Vmxnet3IntState;
+
+typedef struct {
+ PCIDevice parent_obj;
+ NICState *nic;
+ NICConf conf;
+ MemoryRegion bar0;
+ MemoryRegion bar1;
+ MemoryRegion msix_bar;
+
+ Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
+ Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
+
+ /* Whether MSI-X support was installed successfully */
+ bool msix_used;
+ hwaddr drv_shmem;
+ hwaddr temp_shared_guest_driver_memory;
+
+ uint8_t txq_num;
+
+ /* This boolean tells whether RX packet being indicated has to */
+ /* be split into head and body chunks from different RX rings */
+ bool rx_packets_compound;
+
+ bool rx_vlan_stripping;
+ bool lro_supported;
+
+ uint8_t rxq_num;
+
+ /* Network MTU */
+ uint32_t mtu;
+
+ /* Maximum number of fragments for indicated TX packets */
+ uint32_t max_tx_frags;
+
+ /* Maximum number of fragments for indicated RX packets */
+ uint16_t max_rx_frags;
+
+ /* Index for events interrupt */
+ uint8_t event_int_idx;
+
+ /* Whether automatic interrupts masking enabled */
+ bool auto_int_masking;
+
+ bool peer_has_vhdr;
+
+ /* TX packets to QEMU interface */
+ struct NetTxPkt *tx_pkt;
+ uint32_t offload_mode;
+ uint32_t cso_or_gso_size;
+ uint16_t tci;
+ bool needs_vlan;
+
+ struct NetRxPkt *rx_pkt;
+
+ bool tx_sop;
+ bool skip_current_tx_pkt;
+
+ uint32_t device_active;
+ uint32_t last_command;
+
+ uint32_t link_status_and_speed;
+
+ Vmxnet3IntState interrupt_states[VMXNET3_MAX_INTRS];
+
+ uint32_t temp_mac; /* To store the low part first */
+
+ MACAddr perm_mac;
+ uint32_t vlan_table[VMXNET3_VFT_SIZE];
+ uint32_t rx_mode;
+ MACAddr *mcast_list;
+ uint32_t mcast_list_len;
+ uint32_t mcast_list_buff_size; /* needed for live migration. */
+
+ /* Compatibility flags for migration */
+ uint32_t compat_flags;
+} VMXNET3State;
diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
index 299de429ec..9766edb445 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -124,6 +124,10 @@ static Property gen_rp_props[] = {
res_reserve.mem_pref_32, -1),
DEFINE_PROP_SIZE("pref64-reserve", GenPCIERootPort,
res_reserve.mem_pref_64, -1),
+ DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot,
+ speed, PCIE_LINK_SPEED_16),
+ DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot,
+ width, PCIE_LINK_WIDTH_32),
DEFINE_PROP_END_OF_LIST()
};
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 97a8e8b6a4..ff6b8323da 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -206,31 +206,39 @@ static const VMStateDescription pci_bridge_dev_vmstate = {
}
};
-static void pci_bridge_dev_hotplug_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
+void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
{
PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
if (!shpc_present(pci_hotplug_dev)) {
error_setg(errp, "standard hotplug controller has been disabled for "
- "this %s", TYPE_PCI_BRIDGE_DEV);
+ "this %s", object_get_typename(OBJECT(hotplug_dev)));
return;
}
- shpc_device_hotplug_cb(hotplug_dev, dev, errp);
+ shpc_device_plug_cb(hotplug_dev, dev, errp);
}
-static void pci_bridge_dev_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev,
- Error **errp)
+void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
+
+ g_assert(shpc_present(pci_hotplug_dev));
+ shpc_device_unplug_cb(hotplug_dev, dev, errp);
+}
+
+void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
{
PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
if (!shpc_present(pci_hotplug_dev)) {
error_setg(errp, "standard hotplug controller has been disabled for "
- "this %s", TYPE_PCI_BRIDGE_DEV);
+ "this %s", object_get_typename(OBJECT(hotplug_dev)));
return;
}
- shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp);
+ shpc_device_unplug_request_cb(hotplug_dev, dev, errp);
}
static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
@@ -251,8 +259,9 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
dc->props = pci_bridge_dev_properties;
dc->vmsd = &pci_bridge_dev_vmstate;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
- hc->plug = pci_bridge_dev_hotplug_cb;
- hc->unplug_request = pci_bridge_dev_hot_unplug_request_cb;
+ hc->plug = pci_bridge_dev_plug_cb;
+ hc->unplug = pci_bridge_dev_unplug_cb;
+ hc->unplug_request = pci_bridge_dev_unplug_request_cb;
}
static const TypeInfo pci_bridge_dev_info = {
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index 04cf5a6a92..d491b40d04 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -137,33 +137,6 @@ static const VMStateDescription pcie_pci_bridge_dev_vmstate = {
}
};
-static void pcie_pci_bridge_hotplug_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
-{
- PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
-
- if (!shpc_present(pci_hotplug_dev)) {
- error_setg(errp, "standard hotplug controller has been disabled for "
- "this %s", TYPE_PCIE_PCI_BRIDGE_DEV);
- return;
- }
- shpc_device_hotplug_cb(hotplug_dev, dev, errp);
-}
-
-static void pcie_pci_bridge_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev,
- Error **errp)
-{
- PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
-
- if (!shpc_present(pci_hotplug_dev)) {
- error_setg(errp, "standard hotplug controller has been disabled for "
- "this %s", TYPE_PCIE_PCI_BRIDGE_DEV);
- return;
- }
- shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp);
-}
-
static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data)
{
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
@@ -180,8 +153,9 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data)
dc->props = pcie_pci_bridge_dev_properties;
dc->reset = &pcie_pci_bridge_reset;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
- hc->plug = pcie_pci_bridge_hotplug_cb;
- hc->unplug_request = pcie_pci_bridge_hot_unplug_request_cb;
+ hc->plug = pci_bridge_dev_plug_cb;
+ hc->unplug = pci_bridge_dev_unplug_cb;
+ hc->unplug_request = pci_bridge_dev_unplug_request_cb;
}
static const TypeInfo pcie_pci_bridge_info = {
diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
index 45f9e8cd4a..34ad76743c 100644
--- a/hw/pci-bridge/pcie_root_port.c
+++ b/hw/pci-bridge/pcie_root_port.c
@@ -140,6 +140,19 @@ static Property rp_props[] = {
DEFINE_PROP_END_OF_LIST()
};
+static void rp_instance_post_init(Object *obj)
+{
+ PCIESlot *s = PCIE_SLOT(obj);
+
+ if (!s->speed) {
+ s->speed = QEMU_PCI_EXP_LNK_2_5GT;
+ }
+
+ if (!s->width) {
+ s->width = QEMU_PCI_EXP_LNK_X1;
+ }
+}
+
static void rp_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -157,6 +170,7 @@ static void rp_class_init(ObjectClass *klass, void *data)
static const TypeInfo rp_info = {
.name = TYPE_PCIE_ROOT_PORT,
.parent = TYPE_PCIE_SLOT,
+ .instance_post_init = rp_instance_post_init,
.class_init = rp_class_init,
.abstract = true,
.class_size = sizeof(PCIERootPortClass),
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index efb5ce196f..d831fa0a36 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1353,6 +1353,10 @@ uint32_t pci_default_read_config(PCIDevice *d,
{
uint32_t val = 0;
+ if (pci_is_express_downstream_port(d) &&
+ ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) {
+ pcie_sync_bridge_lnk(d);
+ }
memcpy(&val, d->config + address, len);
return le32_to_cpu(val);
}
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index ee9dff2d3a..b9143ac88b 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -241,9 +241,9 @@ void pci_bridge_update_mappings(PCIBridge *br)
* while another accesses an unaffected region. */
memory_region_transaction_begin();
pci_bridge_region_del(br, br->windows);
+ pci_bridge_region_cleanup(br, w);
br->windows = pci_bridge_region_init(br);
memory_region_transaction_commit();
- pci_bridge_region_cleanup(br, w);
}
/* default write_config function for PCI-to-PCI bridge */
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 5eaa935cb5..5f5345dbac 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -20,6 +20,7 @@
#include "qemu/osdep.h"
#include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_host.h"
#include "hw/pci/pci_bus.h"
#include "trace.h"
@@ -50,9 +51,29 @@ static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
return pci_find_device(bus, bus_num, devfn);
}
+static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit)
+{
+ if (*limit > PCI_CONFIG_SPACE_SIZE) {
+ if (!pci_bus_is_express(bus)) {
+ *limit = PCI_CONFIG_SPACE_SIZE;
+ return;
+ }
+
+ if (!pci_bus_is_root(bus)) {
+ PCIDevice *bridge = pci_bridge_get_device(bus);
+ pci_adjust_config_limit(pci_get_bus(bridge), limit);
+ }
+ }
+}
+
void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t limit, uint32_t val, uint32_t len)
{
+ pci_adjust_config_limit(pci_get_bus(pci_dev), &limit);
+ if (limit <= addr) {
+ return;
+ }
+
assert(len <= 4);
/* non-zero functions are only exposed when function 0 is present,
* allowing direct removal of unexposed functions.
@@ -71,6 +92,11 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
{
uint32_t ret;
+ pci_adjust_config_limit(pci_get_bus(pci_dev), &limit);
+ if (limit <= addr) {
+ return ~0x0;
+ }
+
assert(len <= 4);
/* non-zero functions are only exposed when function 0 is present,
* allowing direct removal of unexposed functions.
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6c91bd44a0..2d3d8a047b 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -27,6 +27,7 @@
#include "hw/pci/msi.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pcie_regs.h"
+#include "hw/pci/pcie_port.h"
#include "qemu/range.h"
//#define DEBUG_PCIE
@@ -68,11 +69,12 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
pci_set_long(exp_cap + PCI_EXP_LNKCAP,
(port << PCI_EXP_LNKCAP_PN_SHIFT) |
PCI_EXP_LNKCAP_ASPMS_0S |
- PCI_EXP_LNK_MLW_1 |
- PCI_EXP_LNK_LS_25);
+ QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) |
+ QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT));
pci_set_word(exp_cap + PCI_EXP_LNKSTA,
- PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
+ QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) |
+ QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT));
if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
@@ -86,6 +88,76 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
pci_set_word(cmask + PCI_EXP_LNKSTA, 0);
}
+static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
+{
+ PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
+ uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+ /* Skip anything that isn't a PCIESlot */
+ if (!s) {
+ return;
+ }
+
+ /* Clear and fill LNKCAP from what was configured above */
+ pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+ QEMU_PCI_EXP_LNKCAP_MLW(s->width) |
+ QEMU_PCI_EXP_LNKCAP_MLS(s->speed));
+
+ /*
+ * Link bandwidth notification is required for all root ports and
+ * downstream ports supporting links wider than x1 or multiple link
+ * speeds.
+ */
+ if (s->width > QEMU_PCI_EXP_LNK_X1 ||
+ s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_LBNC);
+ }
+
+ if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+ /*
+ * Hot-plug capable downstream ports and downstream ports supporting
+ * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC
+ * to 1b. PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which
+ * we also hardwire to 1b here. 2.5GT/s hot-plug slots should also
+ * technically implement this, but it's not done here for compatibility.
+ */
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_DLLLARC);
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+
+ /*
+ * Target Link Speed defaults to the highest link speed supported by
+ * the component. 2.5GT/s devices are permitted to hardwire to zero.
+ */
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS);
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2,
+ QEMU_PCI_EXP_LNKCAP_MLS(s->speed) &
+ PCI_EXP_LNKCTL2_TLS);
+ }
+
+ /*
+ * 2.5 & 5.0GT/s can be fully described by LNKCAP, but 8.0GT/s is
+ * actually a reference to the highest bit supported in this register.
+ * We assume the device supports all link speeds.
+ */
+ if (s->speed > QEMU_PCI_EXP_LNK_5GT) {
+ pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U);
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+ PCI_EXP_LNKCAP2_SLS_2_5GB |
+ PCI_EXP_LNKCAP2_SLS_5_0GB |
+ PCI_EXP_LNKCAP2_SLS_8_0GB);
+ if (s->speed > QEMU_PCI_EXP_LNK_8GT) {
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+ PCI_EXP_LNKCAP2_SLS_16_0GB);
+ }
+ }
+}
+
int pcie_cap_init(PCIDevice *dev, uint8_t offset,
uint8_t type, uint8_t port,
Error **errp)
@@ -107,6 +179,9 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset,
/* Filling values common with v1 */
pcie_cap_v1_fill(dev, port, type, PCI_EXP_FLAGS_VER2);
+ /* Fill link speed and width options */
+ pcie_cap_fill_slot_lnk(dev);
+
/* Filling v2 specific values */
pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
@@ -315,9 +390,8 @@ static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event)
hotplug_event_notify(dev);
}
-static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev,
- DeviceState *dev,
- uint8_t **exp_cap, Error **errp)
+static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev,
+ uint8_t **exp_cap, Error **errp)
{
*exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap;
uint16_t sltsta = pci_get_word(*exp_cap + PCI_EXP_SLTSTA);
@@ -331,13 +405,13 @@ static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev,
}
}
-void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
- Error **errp)
+void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
{
uint8_t *exp_cap;
PCIDevice *pci_dev = PCI_DEVICE(dev);
- pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
+ pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
/* Don't send event when device is enabled during qemu machine creation:
* it is present on boot, no hotplug event is necessary. We do send an
@@ -345,6 +419,10 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
if (!dev->hotplugged) {
pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PDS);
+ if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+ }
return;
}
@@ -355,24 +433,36 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
if (pci_get_function_0(pci_dev)) {
pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PDS);
+ if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+ }
pcie_cap_slot_event(PCI_DEVICE(hotplug_dev),
PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
}
}
-static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
{
object_unparent(OBJECT(dev));
}
-void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
+static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+ HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev));
+
+ hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), &error_abort);
+}
+
+void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
{
uint8_t *exp_cap;
PCIDevice *pci_dev = PCI_DEVICE(dev);
PCIBus *bus = pci_get_bus(pci_dev);
- pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
+ pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp);
/* In case user cancel the operation of multi-function hot-add,
* remove the function that is unexposed to guest individually,
@@ -531,6 +621,10 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PDS);
+ if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+ }
pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PDC);
}
@@ -728,6 +822,45 @@ void pcie_add_capability(PCIDevice *dev,
memset(dev->cmask + offset, 0xFF, size);
}
+/*
+ * Sync the PCIe Link Status negotiated speed and width of a bridge with the
+ * downstream device. If downstream device is not present, re-write with the
+ * Link Capability fields. Limit width and speed to bridge capabilities for
+ * compatibility. Use config_read to access the downstream device since it
+ * could be an assigned device with volatile link information.
+ */
+void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
+{
+ PCIBridge *br = PCI_BRIDGE(bridge_dev);
+ PCIBus *bus = pci_bridge_get_sec_bus(br);
+ PCIDevice *target = bus->devices[0];
+ uint8_t *exp_cap = bridge_dev->config + bridge_dev->exp.exp_cap;
+ uint16_t lnksta, lnkcap = pci_get_word(exp_cap + PCI_EXP_LNKCAP);
+
+ if (!target || !target->exp.exp_cap) {
+ lnksta = lnkcap;
+ } else {
+ lnksta = target->config_read(target,
+ target->exp.exp_cap + PCI_EXP_LNKSTA,
+ sizeof(lnksta));
+
+ if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) {
+ lnksta &= ~PCI_EXP_LNKSTA_NLW;
+ lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW;
+ }
+
+ if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) {
+ lnksta &= ~PCI_EXP_LNKSTA_CLS;
+ lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS;
+ }
+ }
+
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta &
+ (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW));
+}
+
/**************************************************************************
* pci express extended capability helper functions
*/
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index 6432b9ac1f..bc07abc31b 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -154,8 +154,9 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data)
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
dc->props = pcie_slot_props;
- hc->plug = pcie_cap_slot_hotplug_cb;
- hc->unplug_request = pcie_cap_slot_hot_unplug_request_cb;
+ hc->plug = pcie_cap_slot_plug_cb;
+ hc->unplug = pcie_cap_slot_unplug_cb;
+ hc->unplug_request = pcie_cap_slot_unplug_request_cb;
}
static const TypeInfo pcie_slot_type_info = {
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index 96a43d2f70..45053b39b9 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -238,6 +238,7 @@ static void shpc_invalid_command(SHPCDevice *shpc)
static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot)
{
+ HotplugHandler *hotplug_ctrl;
int devfn;
int pci_slot = SHPC_IDX_TO_PCI(slot);
for (devfn = PCI_DEVFN(pci_slot, 0);
@@ -245,7 +246,9 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot)
++devfn) {
PCIDevice *affected_dev = shpc->sec_bus->devices[devfn];
if (affected_dev) {
- object_unparent(OBJECT(affected_dev));
+ hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(affected_dev));
+ hotplug_handler_unplug(hotplug_ctrl, DEVICE(affected_dev),
+ &error_abort);
}
}
}
@@ -482,8 +485,8 @@ static const MemoryRegionOps shpc_mmio_ops = {
.max_access_size = 4,
},
};
-static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot,
- SHPCDevice *shpc, Error **errp)
+static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot,
+ SHPCDevice *shpc, Error **errp)
{
int pci_slot = PCI_SLOT(affected_dev->devfn);
*slot = SHPC_PCI_TO_IDX(pci_slot);
@@ -497,7 +500,7 @@ static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot,
}
}
-void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
Error *local_err = NULL;
@@ -505,7 +508,7 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
SHPCDevice *shpc = pci_hotplug_dev->shpc;
int slot;
- shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
+ shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -540,8 +543,14 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
shpc_interrupt_update(pci_hotplug_dev);
}
-void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
+void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ object_unparent(OBJECT(dev));
+}
+
+void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
{
Error *local_err = NULL;
PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
@@ -550,7 +559,7 @@ void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev,
uint8_t led;
int slot;
- shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
+ shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index e6747fce28..b20fea0dfc 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -685,7 +685,7 @@ static void ppce500_cpu_reset(void *opaque)
}
static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
- qemu_irq **irqs)
+ IrqLines *irqs)
{
DeviceState *dev;
SysBusDevice *s;
@@ -705,7 +705,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
k = 0;
for (i = 0; i < smp_cpus; i++) {
for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
- sysbus_connect_irq(s, k++, irqs[i][j]);
+ sysbus_connect_irq(s, k++, irqs[i].irq[j]);
}
}
@@ -713,7 +713,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
}
static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
- qemu_irq **irqs, Error **errp)
+ IrqLines *irqs, Error **errp)
{
Error *err = NULL;
DeviceState *dev;
@@ -742,7 +742,7 @@ static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms,
MemoryRegion *ccsr,
- qemu_irq **irqs)
+ IrqLines *irqs)
{
MachineState *machine = MACHINE(pms);
const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
@@ -806,15 +806,14 @@ void ppce500_init(MachineState *machine)
/* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
* 4 respectively */
unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
- qemu_irq **irqs;
+ IrqLines *irqs;
DeviceState *dev, *mpicdev;
CPUPPCState *firstenv = NULL;
MemoryRegion *ccsr_addr_space;
SysBusDevice *s;
PPCE500CCSRState *ccsr;
- irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
- irqs[0] = g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
+ irqs = g_new0(IrqLines, smp_cpus);
for (i = 0; i < smp_cpus; i++) {
PowerPCCPU *cpu;
CPUState *cs;
@@ -834,10 +833,9 @@ void ppce500_init(MachineState *machine)
firstenv = env;
}
- irqs[i] = irqs[0] + (i * OPENPIC_OUTPUT_NB);
input = (qemu_irq *)env->irq_inputs;
- irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
- irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
+ irqs[i].irq[OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
+ irqs[i].irq[OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0;
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 7e45afae7c..bb19eaba36 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -115,7 +115,7 @@ static void ppc_core99_init(MachineState *machine)
PowerPCCPU *cpu = NULL;
CPUPPCState *env = NULL;
char *filename;
- qemu_irq **openpic_irqs;
+ IrqLines *openpic_irqs;
int linux_boot, i, j, k;
MemoryRegion *ram = g_new(MemoryRegion, 1), *bios = g_new(MemoryRegion, 1);
hwaddr kernel_base, initrd_base, cmdline_base = 0;
@@ -248,41 +248,37 @@ static void ppc_core99_init(MachineState *machine)
memory_region_add_subregion(get_system_memory(), 0xf8000000,
sysbus_mmio_get_region(s, 0));
- openpic_irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
- openpic_irqs[0] =
- g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
+ openpic_irqs = g_new0(IrqLines, smp_cpus);
for (i = 0; i < smp_cpus; i++) {
/* Mac99 IRQ connection between OpenPIC outputs pins
* and PowerPC input pins
*/
switch (PPC_INPUT(env)) {
case PPC_FLAGS_INPUT_6xx:
- openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB);
- openpic_irqs[i][OPENPIC_OUTPUT_INT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_CINT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_MCK] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_MCP];
/* Not connected ? */
- openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL;
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
/* Check this */
- openpic_irqs[i][OPENPIC_OUTPUT_RESET] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_HRESET];
break;
#if defined(TARGET_PPC64)
case PPC_FLAGS_INPUT_970:
- openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB);
- openpic_irqs[i][OPENPIC_OUTPUT_INT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_CINT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_MCK] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_MCP];
/* Not connected ? */
- openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL;
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
/* Check this */
- openpic_irqs[i][OPENPIC_OUTPUT_RESET] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_HRESET];
break;
#endif /* defined(TARGET_PPC64) */
@@ -299,7 +295,7 @@ static void ppc_core99_init(MachineState *machine)
k = 0;
for (i = 0; i < smp_cpus; i++) {
for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
- sysbus_connect_irq(s, k++, openpic_irqs[i][j]);
+ sysbus_connect_irq(s, k++, openpic_irqs[i].irq[j]);
}
}
g_free(openpic_irqs);
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index 1b0a0a8ba3..f47b15f10e 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -149,7 +149,7 @@ static void ref405ep_init(MachineState *machine)
MemoryRegion *bios;
MemoryRegion *sram = g_new(MemoryRegion, 1);
ram_addr_t bdloc;
- MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories));
+ MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
hwaddr ram_bases[2], ram_sizes[2];
target_ulong sram_size;
long bios_size;
@@ -448,7 +448,7 @@ static void taihu_405ep_init(MachineState *machine)
qemu_irq *pic;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *bios;
- MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories));
+ MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
MemoryRegion *ram = g_malloc0(sizeof(*ram));
hwaddr ram_bases[2], ram_sizes[2];
long bios_size;
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index 5c58415cf1..e1aadf126d 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -1519,7 +1519,7 @@ CPUPPCState *ppc405cr_init(MemoryRegion *address_space_mem,
/* OBP arbitrer */
ppc4xx_opba_init(0xef600600);
/* Universal interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] =
@@ -1877,7 +1877,7 @@ CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
/* Initialize timers */
ppc_booke_timers_init(cpu, sysclk, 0);
/* Universal interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] =
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index f5720f979e..b8aa55d526 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -169,8 +169,7 @@ static void bamboo_init(MachineState *machine)
unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 };
MemoryRegion *address_space_mem = get_system_memory();
MemoryRegion *isa = g_new(MemoryRegion, 1);
- MemoryRegion *ram_memories
- = g_malloc(PPC440EP_SDRAM_NR_BANKS * sizeof(*ram_memories));
+ MemoryRegion *ram_memories = g_new(MemoryRegion, PPC440EP_SDRAM_NR_BANKS);
hwaddr ram_bases[PPC440EP_SDRAM_NR_BANKS];
hwaddr ram_sizes[PPC440EP_SDRAM_NR_BANKS];
qemu_irq *pic;
@@ -200,7 +199,7 @@ static void bamboo_init(MachineState *machine)
ppc_dcr_init(env, NULL, NULL);
/* interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
pic = ppcuic_init(env, irqs, 0x0C0, 0, 1);
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index 5aac58f36e..4b051c0950 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -430,7 +430,7 @@ static void sam460ex_init(MachineState *machine)
ppc4xx_plb_init(env);
/* interrupt controllers */
- irqs = g_malloc0(sizeof(*irqs) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
uic[0] = ppcuic_init(env, irqs, 0xc0, 0, 1);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 55be0f56cb..19a07c5c9d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -150,7 +150,7 @@ static void pre_2_10_vmstate_unregister_dummy_icp(int i)
(void *)(uintptr_t) i);
}
-static int xics_max_server_number(sPAPRMachineState *spapr)
+int spapr_max_server_number(sPAPRMachineState *spapr)
{
assert(spapr->vsmt);
return DIV_ROUND_UP(max_cpus * spapr->vsmt, smp_threads);
@@ -889,8 +889,6 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
/* ibm,associativity-lookup-arrays */
buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
cur_index = int_buf = g_malloc0(buf_len);
-
- cur_index = int_buf;
int_buf[0] = cpu_to_be32(nr_nodes);
int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
cur_index += 2;
@@ -1033,7 +1031,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
cpu_to_be32(0),
cpu_to_be32(0),
cpu_to_be32(0),
- cpu_to_be32(nb_numa_nodes ? nb_numa_nodes - 1 : 0),
+ cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1),
};
_FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
@@ -1097,15 +1095,18 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
spapr_dt_rtas_tokens(fdt, rtas);
}
-/* Prepare ibm,arch-vec-5-platform-support, which indicates the MMU features
- * that the guest may request and thus the valid values for bytes 24..26 of
- * option vector 5: */
-static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
+/*
+ * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU
+ * and the XIVE features that the guest may request and thus the valid
+ * values for bytes 23..26 of option vector 5:
+ */
+static void spapr_dt_ov5_platform_support(sPAPRMachineState *spapr, void *fdt,
+ int chosen)
{
PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
char val[2 * 4] = {
- 23, 0x00, /* Xive mode, filled in below. */
+ 23, spapr->irq->ov5, /* Xive mode. */
24, 0x00, /* Hash/Radix, filled in below. */
25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
26, 0x40, /* Radix options: GTSE == yes. */
@@ -1113,7 +1114,11 @@ static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
first_ppc_cpu->compat_pvr)) {
- /* If we're in a pre POWER9 compat mode then the guest should do hash */
+ /*
+ * If we're in a pre POWER9 compat mode then the guest should
+ * do hash and use the legacy interrupt mode
+ */
+ val[1] = 0x00; /* XICS */
val[3] = 0x00; /* Hash */
} else if (kvm_enabled()) {
if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
@@ -1191,7 +1196,7 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
_FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
}
- spapr_dt_ov5_platform_support(fdt, chosen);
+ spapr_dt_ov5_platform_support(spapr, fdt, chosen);
g_free(stdout_path);
g_free(bootlist);
@@ -1270,7 +1275,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
_FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
/* /interrupt controller */
- spapr_dt_xics(xics_max_server_number(spapr), fdt, PHANDLE_XICP);
+ spapr->irq->dt_populate(spapr, spapr_max_server_number(spapr), fdt,
+ PHANDLE_XICP);
ret = spapr_populate_memory(spapr, fdt);
if (ret < 0) {
@@ -1290,7 +1296,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
}
QLIST_FOREACH(phb, &spapr->phbs, list) {
- ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt, smc->irq->nr_msis);
+ ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt,
+ spapr->irq->nr_msis);
if (ret < 0) {
error_report("couldn't setup PCI devices in fdt");
exit(1);
@@ -1620,6 +1627,12 @@ static void spapr_machine_reset(void)
qemu_devices_reset();
+ /*
+ * This is fixing some of the default configuration of the XIVE
+ * devices. To be called after the reset of the machine devices.
+ */
+ spapr_irq_reset(spapr, &error_fatal);
+
/* DRC reset may cause a device to be unplugged. This will cause troubles
* if this device is used by another device (eg, a running vhost backend
* will crash QEMU if the DIMM holding the vring goes away). To avoid such
@@ -1731,14 +1744,6 @@ static int spapr_post_load(void *opaque, int version_id)
return err;
}
- if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
- CPUState *cs;
- CPU_FOREACH(cs) {
- PowerPCCPU *cpu = POWERPC_CPU(cs);
- icp_resend(ICP(cpu->intc));
- }
- }
-
/* In earlier versions, there was no separate qdev for the PAPR
* RTC, so the RTC offset was stored directly in sPAPREnvironment.
* So when migrating from those versions, poke the incoming offset
@@ -1759,6 +1764,11 @@ static int spapr_post_load(void *opaque, int version_id)
}
}
+ err = spapr_irq_post_load(spapr, version_id);
+ if (err) {
+ return err;
+ }
+
return err;
}
@@ -2466,15 +2476,10 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
boot_cores_nr = possible_cpus->len;
}
- /* VSMT must be set in order to be able to compute VCPU ids, ie to
- * call xics_max_server_number() or spapr_vcpu_id().
- */
- spapr_set_vsmt_mode(spapr, &error_fatal);
-
if (smc->pre_2_10_has_unused_icps) {
int i;
- for (i = 0; i < xics_max_server_number(spapr); i++) {
+ for (i = 0; i < spapr_max_server_number(spapr); i++) {
/* Dummy entries get deregistered when real ICPState objects
* are registered during CPU core hotplug.
*/
@@ -2593,8 +2598,14 @@ static void spapr_machine_init(MachineState *machine)
/* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
+ /*
+ * VSMT must be set in order to be able to compute VCPU ids, ie to
+ * call spapr_max_server_number() or spapr_vcpu_id().
+ */
+ spapr_set_vsmt_mode(spapr, &error_fatal);
+
/* Set up Interrupt Controller before we create the VCPUs */
- smc->irq->init(spapr, &error_fatal);
+ spapr_irq_init(spapr, &error_fatal);
/* Set up containers for ibm,client-architecture-support negotiated options
*/
@@ -2621,6 +2632,17 @@ static void spapr_machine_init(MachineState *machine)
/* advertise support for ibm,dyamic-memory-v2 */
spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
+ /* advertise XIVE on POWER9 machines */
+ if (spapr->irq->ov5 & SPAPR_OV5_XIVE_EXPLOIT) {
+ if (ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
+ 0, spapr->max_compat_pvr)) {
+ spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
+ } else {
+ error_report("XIVE-only machines require a POWER9 CPU");
+ exit(1);
+ }
+ }
+
/* init CPUs */
spapr_init_cpus(spapr);
@@ -3031,9 +3053,38 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
visit_type_uint32(v, name, (uint32_t *)opaque, errp);
}
+static char *spapr_get_ic_mode(Object *obj, Error **errp)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (spapr->irq == &spapr_irq_xics_legacy) {
+ return g_strdup("legacy");
+ } else if (spapr->irq == &spapr_irq_xics) {
+ return g_strdup("xics");
+ } else if (spapr->irq == &spapr_irq_xive) {
+ return g_strdup("xive");
+ }
+ g_assert_not_reached();
+}
+
+static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+ /* The legacy IRQ backend can not be set */
+ if (strcmp(value, "xics") == 0) {
+ spapr->irq = &spapr_irq_xics;
+ } else if (strcmp(value, "xive") == 0) {
+ spapr->irq = &spapr_irq_xive;
+ } else {
+ error_setg(errp, "Bad value for \"ic-mode\" property");
+ }
+}
+
static void spapr_instance_init(Object *obj)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+ sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
spapr->htab_fd = -1;
spapr->use_hotplug_event_source = true;
@@ -3067,6 +3118,14 @@ static void spapr_instance_init(Object *obj)
" the host's SMT mode", &error_abort);
object_property_add_bool(obj, "vfio-no-msix-emulation",
spapr_get_msix_emulation, NULL, NULL);
+
+ /* The machine class defines the default interrupt controller mode */
+ spapr->irq = smc->irq;
+ object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
+ spapr_set_ic_mode, NULL);
+ object_property_set_description(obj, "ic-mode",
+ "Specifies the interrupt controller mode (xics, xive)",
+ NULL);
}
static void spapr_machine_finalizefn(Object *obj)
@@ -3789,9 +3848,8 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj,
Monitor *mon)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
- smc->irq->print_info(spapr, mon);
+ spapr->irq->print_info(spapr, mon);
}
int spapr_get_vcpu_id(PowerPCCPU *cpu)
@@ -3873,7 +3931,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
hc->unplug = spapr_machine_device_unplug;
smc->dr_lmb_enabled = true;
- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
mc->has_hotpluggable_cpus = true;
smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
fwc->get_dev_path = spapr_get_fw_dev_path;
@@ -3970,6 +4028,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
{
spapr_machine_4_0_class_options(mc);
SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_1);
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
}
DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 2398ce62c0..82666436e9 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -11,7 +11,6 @@
#include "hw/ppc/spapr_cpu_core.h"
#include "target/ppc/cpu.h"
#include "hw/ppc/spapr.h"
-#include "hw/ppc/xics.h" /* for icp_create() - to be removed */
#include "hw/boards.h"
#include "qapi/error.h"
#include "sysemu/cpus.h"
@@ -233,8 +232,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);
- cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr),
- &local_err);
+ cpu->intc = spapr->irq->cpu_intc_create(spapr, OBJECT(cpu), &local_err);
if (local_err) {
goto error_unregister;
}
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 1b0880ac9e..b56466f89a 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -93,7 +93,7 @@ static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
if (!table) {
*fd = -1;
- table = g_malloc0(nb_table * sizeof(uint64_t));
+ table = g_new0(uint64_t, nb_table);
}
trace_spapr_iommu_new_table(liobn, table, *fd);
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index e77b94cc68..7b3b5afec2 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -12,6 +12,7 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
#include "hw/ppc/xics.h"
#include "sysemu/kvm.h"
@@ -93,15 +94,9 @@ error:
static void spapr_irq_init_xics(sPAPRMachineState *spapr, Error **errp)
{
MachineState *machine = MACHINE(spapr);
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
- int nr_irqs = smc->irq->nr_irqs;
+ int nr_irqs = spapr->irq->nr_irqs;
Error *local_err = NULL;
- /* Initialize the MSI IRQ allocator. */
- if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
- spapr_irq_msi_init(spapr, smc->irq->nr_msis);
- }
-
if (kvm_enabled()) {
if (machine_kernel_irqchip_allowed(machine) &&
!xics_kvm_init(spapr, &local_err)) {
@@ -195,6 +190,24 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon)
ics_pic_print_info(spapr->ics, mon);
}
+static Object *spapr_irq_cpu_intc_create_xics(sPAPRMachineState *spapr,
+ Object *cpu, Error **errp)
+{
+ return icp_create(cpu, spapr->icp_type, XICS_FABRIC(spapr), errp);
+}
+
+static int spapr_irq_post_load_xics(sPAPRMachineState *spapr, int version_id)
+{
+ if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+ CPUState *cs;
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ icp_resend(ICP(cpu->intc));
+ }
+ }
+ return 0;
+}
+
#define SPAPR_IRQ_XICS_NR_IRQS 0x1000
#define SPAPR_IRQ_XICS_NR_MSIS \
(XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI)
@@ -202,37 +215,184 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon)
sPAPRIrq spapr_irq_xics = {
.nr_irqs = SPAPR_IRQ_XICS_NR_IRQS,
.nr_msis = SPAPR_IRQ_XICS_NR_MSIS,
+ .ov5 = SPAPR_OV5_XIVE_LEGACY,
.init = spapr_irq_init_xics,
.claim = spapr_irq_claim_xics,
.free = spapr_irq_free_xics,
.qirq = spapr_qirq_xics,
.print_info = spapr_irq_print_info_xics,
+ .dt_populate = spapr_dt_xics,
+ .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
+ .post_load = spapr_irq_post_load_xics,
+};
+
+/*
+ * XIVE IRQ backend.
+ */
+static void spapr_irq_init_xive(sPAPRMachineState *spapr, Error **errp)
+{
+ MachineState *machine = MACHINE(spapr);
+ uint32_t nr_servers = spapr_max_server_number(spapr);
+ DeviceState *dev;
+ int i;
+
+ /* KVM XIVE device not yet available */
+ if (kvm_enabled()) {
+ if (machine_kernel_irqchip_required(machine)) {
+ error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
+ return;
+ }
+ }
+
+ dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
+ qdev_prop_set_uint32(dev, "nr-irqs", spapr->irq->nr_irqs);
+ /*
+ * 8 XIVE END structures per CPU. One for each available priority
+ */
+ qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
+ qdev_init_nofail(dev);
+
+ spapr->xive = SPAPR_XIVE(dev);
+
+ /* Enable the CPU IPIs */
+ for (i = 0; i < nr_servers; ++i) {
+ spapr_xive_irq_claim(spapr->xive, SPAPR_IRQ_IPI + i, false);
+ }
+
+ spapr_xive_hcall_init(spapr);
+}
+
+static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
+ Error **errp)
+{
+ if (!spapr_xive_irq_claim(spapr->xive, irq, lsi)) {
+ error_setg(errp, "IRQ %d is invalid", irq);
+ return -1;
+ }
+ return 0;
+}
+
+static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
+{
+ int i;
+
+ for (i = irq; i < irq + num; ++i) {
+ spapr_xive_irq_free(spapr->xive, i);
+ }
+}
+
+static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
+{
+ return spapr_xive_qirq(spapr->xive, irq);
+}
+
+static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
+ Monitor *mon)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
+ }
+
+ spapr_xive_pic_print_info(spapr->xive, mon);
+}
+
+static Object *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
+ Object *cpu, Error **errp)
+{
+ Object *obj = xive_tctx_create(cpu, XIVE_ROUTER(spapr->xive), errp);
+
+ /*
+ * (TCG) Early setting the OS CAM line for hotplugged CPUs as they
+ * don't benificiate from the reset of the XIVE IRQ backend
+ */
+ spapr_xive_set_tctx_os_cam(XIVE_TCTX(obj));
+ return obj;
+}
+
+static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id)
+{
+ return 0;
+}
+
+static void spapr_irq_reset_xive(sPAPRMachineState *spapr, Error **errp)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ /* (TCG) Set the OS CAM line of the thread interrupt context. */
+ spapr_xive_set_tctx_os_cam(XIVE_TCTX(cpu->intc));
+ }
+}
+
+/*
+ * XIVE uses the full IRQ number space. Set it to 8K to be compatible
+ * with XICS.
+ */
+
+#define SPAPR_IRQ_XIVE_NR_IRQS 0x2000
+#define SPAPR_IRQ_XIVE_NR_MSIS (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
+
+sPAPRIrq spapr_irq_xive = {
+ .nr_irqs = SPAPR_IRQ_XIVE_NR_IRQS,
+ .nr_msis = SPAPR_IRQ_XIVE_NR_MSIS,
+ .ov5 = SPAPR_OV5_XIVE_EXPLOIT,
+
+ .init = spapr_irq_init_xive,
+ .claim = spapr_irq_claim_xive,
+ .free = spapr_irq_free_xive,
+ .qirq = spapr_qirq_xive,
+ .print_info = spapr_irq_print_info_xive,
+ .dt_populate = spapr_dt_xive,
+ .cpu_intc_create = spapr_irq_cpu_intc_create_xive,
+ .post_load = spapr_irq_post_load_xive,
+ .reset = spapr_irq_reset_xive,
};
/*
* sPAPR IRQ frontend routines for devices
*/
+void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
+{
+ /* Initialize the MSI IRQ allocator. */
+ if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+ spapr_irq_msi_init(spapr, spapr->irq->nr_msis);
+ }
+
+ spapr->irq->init(spapr, errp);
+}
int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-
- return smc->irq->claim(spapr, irq, lsi, errp);
+ return spapr->irq->claim(spapr, irq, lsi, errp);
}
void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-
- smc->irq->free(spapr, irq, num);
+ spapr->irq->free(spapr, irq, num);
}
qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ return spapr->irq->qirq(spapr, irq);
+}
+
+int spapr_irq_post_load(sPAPRMachineState *spapr, int version_id)
+{
+ return spapr->irq->post_load(spapr, version_id);
+}
- return smc->irq->qirq(spapr, irq);
+void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp)
+{
+ if (spapr->irq->reset) {
+ spapr->irq->reset(spapr, errp);
+ }
}
/*
@@ -295,10 +455,14 @@ int spapr_irq_find(sPAPRMachineState *spapr, int num, bool align, Error **errp)
sPAPRIrq spapr_irq_xics_legacy = {
.nr_irqs = SPAPR_IRQ_XICS_LEGACY_NR_IRQS,
.nr_msis = SPAPR_IRQ_XICS_LEGACY_NR_IRQS,
+ .ov5 = SPAPR_OV5_XIVE_LEGACY,
.init = spapr_irq_init_xics,
.claim = spapr_irq_claim_xics,
.free = spapr_irq_free_xics,
.qirq = spapr_qirq_xics,
.print_info = spapr_irq_print_info_xics,
+ .dt_populate = spapr_dt_xics,
+ .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
+ .post_load = spapr_irq_post_load_xics,
};
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 2374d55fc1..bfb02ee96b 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1370,18 +1370,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
/* Callback to be called during DRC release. */
void spapr_phb_remove_pci_device_cb(DeviceState *dev)
{
- /* some version guests do not wait for completion of a device
- * cleanup (generally done asynchronously by the kernel) before
- * signaling to QEMU that the device is safe, but instead sleep
- * for some 'safe' period of time. unfortunately on a busy host
- * this sleep isn't guaranteed to be long enough, resulting in
- * bad things like IRQ lines being left asserted during final
- * device removal. to deal with this we call reset just prior
- * to finalizing the device, which will put the device back into
- * an 'idle' state, as the device cleanup code expects.
- */
- pci_device_reset(PCI_DEVICE(dev));
- object_unparent(OBJECT(dev));
+ HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+ hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
}
static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb,
@@ -1490,6 +1481,23 @@ out:
}
}
+static void spapr_pci_unplug(HotplugHandler *plug_handler,
+ DeviceState *plugged_dev, Error **errp)
+{
+ /* some version guests do not wait for completion of a device
+ * cleanup (generally done asynchronously by the kernel) before
+ * signaling to QEMU that the device is safe, but instead sleep
+ * for some 'safe' period of time. unfortunately on a busy host
+ * this sleep isn't guaranteed to be long enough, resulting in
+ * bad things like IRQ lines being left asserted during final
+ * device removal. to deal with this we call reset just prior
+ * to finalizing the device, which will put the device back into
+ * an 'idle' state, as the device cleanup code expects.
+ */
+ pci_device_reset(PCI_DEVICE(plugged_dev));
+ object_unparent(OBJECT(plugged_dev));
+}
+
static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
DeviceState *plugged_dev, Error **errp)
{
@@ -1965,6 +1973,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
dc->user_creatable = true;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
hp->plug = spapr_pci_plug;
+ hp->unplug = spapr_pci_unplug;
hp->unplug_request = spapr_pci_unplug_request;
}
diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c
index 329feb148f..cb8a410359 100644
--- a/hw/ppc/spapr_rtas_ddw.c
+++ b/hw/ppc/spapr_rtas_ddw.c
@@ -96,9 +96,8 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
uint32_t nret, target_ulong rets)
{
sPAPRPHBState *sphb;
- uint64_t buid, max_window_size;
+ uint64_t buid;
uint32_t avail, addr, pgmask = 0;
- MachineState *machine = MACHINE(spapr);
if ((nargs != 3) || (nret != 5)) {
goto param_error_exit;
@@ -114,27 +113,15 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
/* Translate page mask to LoPAPR format */
pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask);
- /*
- * This is "Largest contiguous block of TCEs allocated specifically
- * for (that is, are reserved for) this PE".
- * Return the maximum number as maximum supported RAM size was in 4K pages.
- */
- if (machine->ram_size == machine->maxram_size) {
- max_window_size = machine->ram_size;
- } else {
- max_window_size = machine->device_memory->base +
- memory_region_size(&machine->device_memory->mr);
- }
-
avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, avail);
- rtas_st(rets, 2, max_window_size >> SPAPR_TCE_PAGE_SHIFT);
+ rtas_st(rets, 2, 0x80000000); /* The largest window we can possibly have */
rtas_st(rets, 3, pgmask);
rtas_st(rets, 4, 0); /* DMA migration mask, not supported */
- trace_spapr_iommu_ddw_query(buid, addr, avail, max_window_size, pgmask);
+ trace_spapr_iommu_ddw_query(buid, addr, avail, 0x80000000, pgmask);
return;
param_error_exit:
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 840d4a3c45..7e8a9ad093 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -730,7 +730,7 @@ void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt)
}
/* Copy out into an array of pointers */
- qdevs = g_malloc(sizeof(qdev) * num);
+ qdevs = g_new(DeviceState *, num);
num = 0;
QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
qdevs[num++] = kid->child;
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index ee9b4b4490..5177120574 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -105,7 +105,7 @@ static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size,
ppc_dcr_init(env, NULL, NULL);
/* interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
ppcuic_init(env, irqs, 0x0C0, 0, 1);
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index d7a4bbd91f..c28bfbd44d 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -15,10 +15,18 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "sysemu/sysemu.h"
#include "qapi/error.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qapi-events-rdma.h"
#include <infiniband/verbs.h>
+#include <infiniband/umad_types.h>
+#include <infiniband/umad.h>
+#include <rdma/rdma_user_cm.h>
+#include "contrib/rdmacm-mux/rdmacm-mux.h"
#include "trace.h"
#include "rdma_utils.h"
#include "rdma_rm.h"
@@ -29,27 +37,46 @@
#define VENDOR_ERR_TOO_MANY_SGES 0x202
#define VENDOR_ERR_NOMEM 0x203
#define VENDOR_ERR_QP0 0x204
-#define VENDOR_ERR_NO_SGE 0x205
+#define VENDOR_ERR_INV_NUM_SGE 0x205
#define VENDOR_ERR_MAD_SEND 0x206
#define VENDOR_ERR_INVLKEY 0x207
#define VENDOR_ERR_MR_SMALL 0x208
+#define VENDOR_ERR_INV_MAD_BUFF 0x209
#define THR_NAME_LEN 16
#define THR_POLL_TO 5000
+#define MAD_HDR_SIZE sizeof(struct ibv_grh)
+
typedef struct BackendCtx {
- uint64_t req_id;
void *up_ctx;
bool is_tx_req;
+ struct ibv_sge sge; /* Used to save MAD recv buffer */
} BackendCtx;
-static void (*comp_handler)(int status, unsigned int vendor_err, void *ctx);
+struct backend_umad {
+ struct ib_user_mad hdr;
+ char mad[RDMA_MAX_PRIVATE_DATA];
+};
+
+static void (*comp_handler)(void *ctx, struct ibv_wc *wc);
-static void dummy_comp_handler(int status, unsigned int vendor_err, void *ctx)
+static void dummy_comp_handler(void *ctx, struct ibv_wc *wc)
{
pr_err("No completion handler is registered\n");
}
+static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err,
+ void *ctx)
+{
+ struct ibv_wc wc = {0};
+
+ wc.status = status;
+ wc.vendor_err = vendor_err;
+
+ comp_handler(ctx, &wc);
+}
+
static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
{
int i, ne;
@@ -74,7 +101,7 @@ static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
}
pr_dbg("Processing %s CQE\n", bctx->is_tx_req ? "send" : "recv");
- comp_handler(wc[i].status, wc[i].vendor_err, bctx->up_ctx);
+ comp_handler(bctx->up_ctx, &wc[i]);
rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
g_free(bctx);
@@ -146,6 +173,77 @@ static void *comp_handler_thread(void *arg)
return NULL;
}
+static inline void disable_rdmacm_mux_async(RdmaBackendDev *backend_dev)
+{
+ atomic_set(&backend_dev->rdmacm_mux.can_receive, 0);
+}
+
+static inline void enable_rdmacm_mux_async(RdmaBackendDev *backend_dev)
+{
+ atomic_set(&backend_dev->rdmacm_mux.can_receive, sizeof(RdmaCmMuxMsg));
+}
+
+static inline int rdmacm_mux_can_process_async(RdmaBackendDev *backend_dev)
+{
+ return atomic_read(&backend_dev->rdmacm_mux.can_receive);
+}
+
+static int check_mux_op_status(CharBackend *mad_chr_be)
+{
+ RdmaCmMuxMsg msg = {0};
+ int ret;
+
+ pr_dbg("Reading response\n");
+ ret = qemu_chr_fe_read_all(mad_chr_be, (uint8_t *)&msg, sizeof(msg));
+ if (ret != sizeof(msg)) {
+ pr_dbg("Invalid message size %d, expecting %ld\n", ret, sizeof(msg));
+ return -EIO;
+ }
+
+ pr_dbg("msg_type=%d\n", msg.hdr.msg_type);
+ pr_dbg("op_code=%d\n", msg.hdr.op_code);
+ pr_dbg("err_code=%d\n", msg.hdr.err_code);
+
+ if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_RESP) {
+ pr_dbg("Invalid message type %d\n", msg.hdr.msg_type);
+ return -EIO;
+ }
+
+ if (msg.hdr.err_code != RDMACM_MUX_ERR_CODE_OK) {
+ pr_dbg("Operation failed in mux, error code %d\n", msg.hdr.err_code);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int exec_rdmacm_mux_req(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg)
+{
+ int rc = 0;
+
+ pr_dbg("Executing request %d\n", msg->hdr.op_code);
+
+ msg->hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ;
+ disable_rdmacm_mux_async(backend_dev);
+ rc = qemu_chr_fe_write(backend_dev->rdmacm_mux.chr_be,
+ (const uint8_t *)msg, sizeof(*msg));
+ if (rc != sizeof(*msg)) {
+ enable_rdmacm_mux_async(backend_dev);
+ pr_dbg("Fail to send request to rdmacm_mux (rc=%d)\n", rc);
+ return -EIO;
+ }
+
+ rc = check_mux_op_status(backend_dev->rdmacm_mux.chr_be);
+ if (rc) {
+ pr_dbg("Fail to execute rdmacm_mux request %d (rc=%d)\n",
+ msg->hdr.op_code, rc);
+ }
+
+ enable_rdmacm_mux_async(backend_dev);
+
+ return 0;
+}
+
static void stop_backend_thread(RdmaBackendThread *thread)
{
thread->run = false;
@@ -168,8 +266,8 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
}
-void rdma_backend_register_comp_handler(void (*handler)(int status,
- unsigned int vendor_err, void *ctx))
+void rdma_backend_register_comp_handler(void (*handler)(void *ctx,
+ struct ibv_wc *wc))
{
comp_handler = handler;
}
@@ -286,11 +384,73 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
return 0;
}
+static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx,
+ union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge)
+{
+ RdmaCmMuxMsg msg = {0};
+ char *hdr, *data;
+ int ret;
+
+ pr_dbg("num_sge=%d\n", num_sge);
+
+ if (num_sge != 2) {
+ return -EINVAL;
+ }
+
+ msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD;
+ memcpy(msg.hdr.sgid.raw, sgid->raw, sizeof(msg.hdr.sgid));
+
+ msg.umad_len = sge[0].length + sge[1].length;
+ pr_dbg("umad_len=%d\n", msg.umad_len);
+
+ if (msg.umad_len > sizeof(msg.umad.mad)) {
+ return -ENOMEM;
+ }
+
+ msg.umad.hdr.addr.qpn = htobe32(1);
+ msg.umad.hdr.addr.grh_present = 1;
+ pr_dbg("sgid_idx=%d\n", sgid_idx);
+ pr_dbg("sgid=0x%llx\n", sgid->global.interface_id);
+ msg.umad.hdr.addr.gid_index = sgid_idx;
+ memcpy(msg.umad.hdr.addr.gid, sgid->raw, sizeof(msg.umad.hdr.addr.gid));
+ msg.umad.hdr.addr.hop_limit = 0xFF;
+
+ hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length);
+ if (!hdr) {
+ pr_dbg("Fail to map to sge[0]\n");
+ return -ENOMEM;
+ }
+ data = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
+ if (!data) {
+ pr_dbg("Fail to map to sge[1]\n");
+ rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
+ return -ENOMEM;
+ }
+
+ pr_dbg_buf("mad_hdr", hdr, sge[0].length);
+ pr_dbg_buf("mad_data", data, sge[1].length);
+
+ memcpy(&msg.umad.mad[0], hdr, sge[0].length);
+ memcpy(&msg.umad.mad[sge[0].length], data, sge[1].length);
+
+ rdma_pci_dma_unmap(backend_dev->dev, data, sge[1].length);
+ rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
+
+ ret = exec_rdmacm_mux_req(backend_dev, &msg);
+ if (ret) {
+ pr_dbg("Fail to send MAD to rdma_umadmux (%d)\n", ret);
+ return -EIO;
+ }
+
+ return 0;
+}
+
void rdma_backend_post_send(RdmaBackendDev *backend_dev,
RdmaBackendQP *qp, uint8_t qp_type,
struct ibv_sge *sge, uint32_t num_sge,
- union ibv_gid *dgid, uint32_t dqpn,
- uint32_t dqkey, void *ctx)
+ uint8_t sgid_idx, union ibv_gid *sgid,
+ union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
+ void *ctx)
{
BackendCtx *bctx;
struct ibv_sge new_sge[MAX_SGE];
@@ -301,19 +461,23 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */
if (qp_type == IBV_QPT_SMI) {
pr_dbg("QP0 unsupported\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
} else if (qp_type == IBV_QPT_GSI) {
pr_dbg("QP1\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+ rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge);
+ if (rc) {
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+ } else {
+ complete_work(IBV_WC_SUCCESS, 0, ctx);
+ }
}
- pr_dbg("qp->ibqp is NULL for qp_type %d!!!\n", qp_type);
return;
}
pr_dbg("num_sge=%d\n", num_sge);
- if (!num_sge) {
- pr_dbg("num_sge=0\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
+ if (!num_sge || num_sge > MAX_SGE) {
+ pr_dbg("invalid num_sge=%d\n", num_sge);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx);
return;
}
@@ -324,20 +488,23 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
if (unlikely(rc)) {
pr_dbg("Failed to allocate cqe_ctx\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
goto out_free_bctx;
}
rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge);
if (rc) {
pr_dbg("Error: Failed to build host SGE array\n");
- comp_handler(IBV_WC_GENERAL_ERR, rc, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
goto out_dealloc_cqe_ctx;
}
if (qp_type == IBV_QPT_UD) {
- wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd,
- backend_dev->backend_gid_idx, dgid);
+ wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, sgid_idx, dgid);
+ if (!wr.wr.ud.ah) {
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+ goto out_dealloc_cqe_ctx;
+ }
wr.wr.ud.remote_qpn = dqpn;
wr.wr.ud.remote_qkey = dqkey;
}
@@ -353,7 +520,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
if (rc) {
pr_dbg("Fail (%d, %d) to post send WQE to qpn %d\n", rc, errno,
qp->ibqp->qp_num);
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
goto out_dealloc_cqe_ctx;
}
@@ -366,6 +533,48 @@ out_free_bctx:
g_free(bctx);
}
+static unsigned int save_mad_recv_buffer(RdmaBackendDev *backend_dev,
+ struct ibv_sge *sge, uint32_t num_sge,
+ void *ctx)
+{
+ BackendCtx *bctx;
+ int rc;
+ uint32_t bctx_id;
+
+ if (num_sge != 1) {
+ pr_dbg("Invalid num_sge (%d), expecting 1\n", num_sge);
+ return VENDOR_ERR_INV_NUM_SGE;
+ }
+
+ if (sge[0].length < RDMA_MAX_PRIVATE_DATA + sizeof(struct ibv_grh)) {
+ pr_dbg("Too small buffer for MAD\n");
+ return VENDOR_ERR_INV_MAD_BUFF;
+ }
+
+ pr_dbg("addr=0x%" PRIx64"\n", sge[0].addr);
+ pr_dbg("length=%d\n", sge[0].length);
+ pr_dbg("lkey=%d\n", sge[0].lkey);
+
+ bctx = g_malloc0(sizeof(*bctx));
+
+ rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
+ if (unlikely(rc)) {
+ g_free(bctx);
+ pr_dbg("Fail to allocate cqe_ctx\n");
+ return VENDOR_ERR_NOMEM;
+ }
+
+ pr_dbg("bctx_id %d, bctx %p, ctx %p\n", bctx_id, bctx, ctx);
+ bctx->up_ctx = ctx;
+ bctx->sge = *sge;
+
+ qemu_mutex_lock(&backend_dev->recv_mads_list.lock);
+ qlist_append_int(backend_dev->recv_mads_list.list, bctx_id);
+ qemu_mutex_unlock(&backend_dev->recv_mads_list.lock);
+
+ return 0;
+}
+
void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
RdmaDeviceResources *rdma_dev_res,
RdmaBackendQP *qp, uint8_t qp_type,
@@ -380,19 +589,22 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */
if (qp_type == IBV_QPT_SMI) {
pr_dbg("QP0 unsupported\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
}
if (qp_type == IBV_QPT_GSI) {
pr_dbg("QP1\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+ rc = save_mad_recv_buffer(backend_dev, sge, num_sge, ctx);
+ if (rc) {
+ complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
+ }
}
return;
}
pr_dbg("num_sge=%d\n", num_sge);
- if (!num_sge) {
- pr_dbg("num_sge=0\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
+ if (!num_sge || num_sge > MAX_SGE) {
+ pr_dbg("invalid num_sge=%d\n", num_sge);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_NUM_SGE, ctx);
return;
}
@@ -403,14 +615,14 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx);
if (unlikely(rc)) {
pr_dbg("Failed to allocate cqe_ctx\n");
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
goto out_free_bctx;
}
rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge);
if (rc) {
pr_dbg("Error: Failed to build host SGE array\n");
- comp_handler(IBV_WC_GENERAL_ERR, rc, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
goto out_dealloc_cqe_ctx;
}
@@ -422,7 +634,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
if (rc) {
pr_dbg("Fail (%d, %d) to post recv WQE to qpn %d\n", rc, errno,
qp->ibqp->qp_num);
- comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
goto out_dealloc_cqe_ctx;
}
@@ -513,7 +725,6 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
switch (qp_type) {
case IBV_QPT_GSI:
- pr_dbg("QP1 unsupported\n");
return 0;
case IBV_QPT_RC:
@@ -594,9 +805,9 @@ int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
}
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
- uint8_t qp_type, union ibv_gid *dgid,
- uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
- bool use_qkey)
+ uint8_t qp_type, uint8_t sgid_idx,
+ union ibv_gid *dgid, uint32_t dqpn,
+ uint32_t rq_psn, uint32_t qkey, bool use_qkey)
{
struct ibv_qp_attr attr = {0};
union ibv_gid ibv_gid = {
@@ -608,13 +819,15 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
attr.qp_state = IBV_QPS_RTR;
attr_mask = IBV_QP_STATE;
+ qp->sgid_idx = sgid_idx;
+
switch (qp_type) {
case IBV_QPT_RC:
pr_dbg("dgid=0x%" PRIx64 ",%" PRIx64 "\n",
be64_to_cpu(ibv_gid.global.subnet_prefix),
be64_to_cpu(ibv_gid.global.interface_id));
pr_dbg("dqpn=0x%x\n", dqpn);
- pr_dbg("sgid_idx=%d\n", backend_dev->backend_gid_idx);
+ pr_dbg("sgid_idx=%d\n", qp->sgid_idx);
pr_dbg("sport_num=%d\n", backend_dev->port_num);
pr_dbg("rq_psn=0x%x\n", rq_psn);
@@ -626,7 +839,7 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
attr.ah_attr.is_global = 1;
attr.ah_attr.grh.hop_limit = 1;
attr.ah_attr.grh.dgid = ibv_gid;
- attr.ah_attr.grh.sgid_index = backend_dev->backend_gid_idx;
+ attr.ah_attr.grh.sgid_index = qp->sgid_idx;
attr.rq_psn = rq_psn;
attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
@@ -635,8 +848,8 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
break;
case IBV_QPT_UD:
+ pr_dbg("qkey=0x%x\n", qkey);
if (use_qkey) {
- pr_dbg("qkey=0x%x\n", qkey);
attr.qkey = qkey;
attr_mask |= IBV_QP_QKEY;
}
@@ -744,23 +957,224 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
return 0;
}
+static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid,
+ union ibv_gid *my_gid, int paylen)
+{
+ grh->paylen = htons(paylen);
+ grh->sgid = *sgid;
+ grh->dgid = *my_gid;
+
+ pr_dbg("paylen=%d (net=0x%x)\n", paylen, grh->paylen);
+ pr_dbg("dgid=0x%llx\n", my_gid->global.interface_id);
+ pr_dbg("sgid=0x%llx\n", sgid->global.interface_id);
+}
+
+static void process_incoming_mad_req(RdmaBackendDev *backend_dev,
+ RdmaCmMuxMsg *msg)
+{
+ QObject *o_ctx_id;
+ unsigned long cqe_ctx_id;
+ BackendCtx *bctx;
+ char *mad;
+
+ pr_dbg("umad_len=%d\n", msg->umad_len);
+
+#ifdef PVRDMA_DEBUG
+ struct umad_hdr *hdr = (struct umad_hdr *)&msg->umad.mad;
+ pr_dbg("bv %x cls %x cv %x mtd %x st %d tid %" PRIx64 " at %x atm %x\n",
+ hdr->base_version, hdr->mgmt_class, hdr->class_version,
+ hdr->method, hdr->status, be64toh(hdr->tid),
+ hdr->attr_id, hdr->attr_mod);
+#endif
+
+ qemu_mutex_lock(&backend_dev->recv_mads_list.lock);
+ o_ctx_id = qlist_pop(backend_dev->recv_mads_list.list);
+ qemu_mutex_unlock(&backend_dev->recv_mads_list.lock);
+ if (!o_ctx_id) {
+ pr_dbg("No more free MADs buffers, waiting for a while\n");
+ sleep(THR_POLL_TO);
+ return;
+ }
+
+ cqe_ctx_id = qnum_get_uint(qobject_to(QNum, o_ctx_id));
+ bctx = rdma_rm_get_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id);
+ if (unlikely(!bctx)) {
+ pr_dbg("Error: Fail to find ctx for %ld\n", cqe_ctx_id);
+ return;
+ }
+
+ pr_dbg("id %ld, bctx %p, ctx %p\n", cqe_ctx_id, bctx, bctx->up_ctx);
+
+ mad = rdma_pci_dma_map(backend_dev->dev, bctx->sge.addr,
+ bctx->sge.length);
+ if (!mad || bctx->sge.length < msg->umad_len + MAD_HDR_SIZE) {
+ complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_MAD_BUFF,
+ bctx->up_ctx);
+ } else {
+ struct ibv_wc wc = {0};
+ pr_dbg_buf("mad", msg->umad.mad, msg->umad_len);
+ memset(mad, 0, bctx->sge.length);
+ build_mad_hdr((struct ibv_grh *)mad,
+ (union ibv_gid *)&msg->umad.hdr.addr.gid, &msg->hdr.sgid,
+ msg->umad_len);
+ memcpy(&mad[MAD_HDR_SIZE], msg->umad.mad, msg->umad_len);
+ rdma_pci_dma_unmap(backend_dev->dev, mad, bctx->sge.length);
+
+ wc.byte_len = msg->umad_len;
+ wc.status = IBV_WC_SUCCESS;
+ wc.wc_flags = IBV_WC_GRH;
+ comp_handler(bctx->up_ctx, &wc);
+ }
+
+ g_free(bctx);
+ rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id);
+}
+
+static inline int rdmacm_mux_can_receive(void *opaque)
+{
+ RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque;
+
+ return rdmacm_mux_can_process_async(backend_dev);
+}
+
+static void rdmacm_mux_read(void *opaque, const uint8_t *buf, int size)
+{
+ RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque;
+ RdmaCmMuxMsg *msg = (RdmaCmMuxMsg *)buf;
+
+ pr_dbg("Got %d bytes\n", size);
+ pr_dbg("msg_type=%d\n", msg->hdr.msg_type);
+ pr_dbg("op_code=%d\n", msg->hdr.op_code);
+
+ if (msg->hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ &&
+ msg->hdr.op_code != RDMACM_MUX_OP_CODE_MAD) {
+ pr_dbg("Error: Not a MAD request, skipping\n");
+ return;
+ }
+ process_incoming_mad_req(backend_dev, msg);
+}
+
+static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be)
+{
+ int ret;
+
+ backend_dev->rdmacm_mux.chr_be = mad_chr_be;
+
+ ret = qemu_chr_fe_backend_connected(backend_dev->rdmacm_mux.chr_be);
+ if (!ret) {
+ pr_dbg("Missing chardev for MAD multiplexer\n");
+ return -EIO;
+ }
+
+ qemu_mutex_init(&backend_dev->recv_mads_list.lock);
+ backend_dev->recv_mads_list.list = qlist_new();
+
+ enable_rdmacm_mux_async(backend_dev);
+
+ qemu_chr_fe_set_handlers(backend_dev->rdmacm_mux.chr_be,
+ rdmacm_mux_can_receive, rdmacm_mux_read, NULL,
+ NULL, backend_dev, NULL, true);
+
+ return 0;
+}
+
+static void mad_fini(RdmaBackendDev *backend_dev)
+{
+ pr_dbg("Stopping MAD\n");
+ disable_rdmacm_mux_async(backend_dev);
+ qemu_chr_fe_disconnect(backend_dev->rdmacm_mux.chr_be);
+ qlist_destroy_obj(QOBJECT(backend_dev->recv_mads_list.list));
+ qemu_mutex_destroy(&backend_dev->recv_mads_list.lock);
+}
+
+int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
+ union ibv_gid *gid)
+{
+ union ibv_gid sgid;
+ int ret;
+ int i = 0;
+
+ pr_dbg("0x%llx, 0x%llx\n",
+ (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(gid->global.interface_id));
+
+ do {
+ ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, i,
+ &sgid);
+ i++;
+ } while (!ret && (memcmp(&sgid, gid, sizeof(*gid))));
+
+ pr_dbg("gid_index=%d\n", i - 1);
+
+ return ret ? ret : i - 1;
+}
+
+int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
+ union ibv_gid *gid)
+{
+ RdmaCmMuxMsg msg = {0};
+ int ret;
+
+ pr_dbg("0x%llx, 0x%llx\n",
+ (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(gid->global.interface_id));
+
+ msg.hdr.op_code = RDMACM_MUX_OP_CODE_REG;
+ memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid));
+
+ ret = exec_rdmacm_mux_req(backend_dev, &msg);
+ if (ret) {
+ pr_dbg("Fail to register GID to rdma_umadmux (%d)\n", ret);
+ return -EIO;
+ }
+
+ qapi_event_send_rdma_gid_status_changed(ifname, true,
+ gid->global.subnet_prefix,
+ gid->global.interface_id);
+
+ return ret;
+}
+
+int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
+ union ibv_gid *gid)
+{
+ RdmaCmMuxMsg msg = {0};
+ int ret;
+
+ pr_dbg("0x%llx, 0x%llx\n",
+ (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(gid->global.interface_id));
+
+ msg.hdr.op_code = RDMACM_MUX_OP_CODE_UNREG;
+ memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid));
+
+ ret = exec_rdmacm_mux_req(backend_dev, &msg);
+ if (ret) {
+ pr_dbg("Fail to unregister GID from rdma_umadmux (%d)\n", ret);
+ return -EIO;
+ }
+
+ qapi_event_send_rdma_gid_status_changed(ifname, false,
+ gid->global.subnet_prefix,
+ gid->global.interface_id);
+
+ return 0;
+}
+
int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
RdmaDeviceResources *rdma_dev_res,
const char *backend_device_name, uint8_t port_num,
- uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
+ struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be,
Error **errp)
{
int i;
int ret = 0;
int num_ibv_devices;
struct ibv_device **dev_list;
- struct ibv_port_attr port_attr;
memset(backend_dev, 0, sizeof(*backend_dev));
backend_dev->dev = pdev;
-
- backend_dev->backend_gid_idx = backend_gid_idx;
backend_dev->port_num = port_num;
backend_dev->rdma_dev_res = rdma_dev_res;
@@ -797,9 +1211,9 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
backend_dev->ib_dev = *dev_list;
}
- pr_dbg("Using backend device %s, port %d, gid_idx %d\n",
- ibv_get_device_name(backend_dev->ib_dev),
- backend_dev->port_num, backend_dev->backend_gid_idx);
+ pr_dbg("Using backend device %s, port %d\n",
+ ibv_get_device_name(backend_dev->ib_dev), backend_dev->port_num);
+ pr_dbg("uverb device %s\n", backend_dev->ib_dev->dev_name);
backend_dev->context = ibv_open_device(backend_dev->ib_dev);
if (!backend_dev->context) {
@@ -816,20 +1230,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
}
pr_dbg("dev->backend_dev.channel=%p\n", backend_dev->channel);
- ret = ibv_query_port(backend_dev->context, backend_dev->port_num,
- &port_attr);
- if (ret) {
- error_setg(errp, "Error %d from ibv_query_port", ret);
- ret = -EIO;
- goto out_destroy_comm_channel;
- }
-
- if (backend_dev->backend_gid_idx >= port_attr.gid_tbl_len) {
- error_setg(errp, "Invalid backend_gid_idx, should be less than %d",
- port_attr.gid_tbl_len);
- goto out_destroy_comm_channel;
- }
-
ret = init_device_caps(backend_dev, dev_attr);
if (ret) {
error_setg(errp, "Failed to initialize device capabilities");
@@ -837,18 +1237,13 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
goto out_destroy_comm_channel;
}
- ret = ibv_query_gid(backend_dev->context, backend_dev->port_num,
- backend_dev->backend_gid_idx, &backend_dev->gid);
+
+ ret = mad_init(backend_dev, mad_chr_be);
if (ret) {
- error_setg(errp, "Failed to query gid %d",
- backend_dev->backend_gid_idx);
+ error_setg(errp, "Fail to initialize mad");
ret = -EIO;
goto out_destroy_comm_channel;
}
- pr_dbg("subnet_prefix=0x%" PRIx64 "\n",
- be64_to_cpu(backend_dev->gid.global.subnet_prefix));
- pr_dbg("interface_id=0x%" PRIx64 "\n",
- be64_to_cpu(backend_dev->gid.global.interface_id));
backend_dev->comp_thread.run = false;
backend_dev->comp_thread.is_running = false;
@@ -886,6 +1281,7 @@ void rdma_backend_stop(RdmaBackendDev *backend_dev)
void rdma_backend_fini(RdmaBackendDev *backend_dev)
{
rdma_backend_stop(backend_dev);
+ mad_fini(backend_dev);
g_hash_table_destroy(ah_hash);
ibv_destroy_comp_channel(backend_dev->channel);
ibv_close_device(backend_dev->context);
diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 86e8fe8ab6..8cae40f827 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -17,6 +17,8 @@
#define RDMA_BACKEND_H
#include "qapi/error.h"
+#include "chardev/char-fe.h"
+
#include "rdma_rm_defs.h"
#include "rdma_backend_defs.h"
@@ -26,14 +28,9 @@ enum ibv_special_qp_type {
IBV_QPT_GSI = 1,
};
-static inline union ibv_gid *rdma_backend_gid(RdmaBackendDev *dev)
-{
- return &dev->gid;
-}
-
static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp)
{
- return qp->ibqp ? qp->ibqp->qp_num : 0;
+ return qp->ibqp ? qp->ibqp->qp_num : 1;
}
static inline uint32_t rdma_backend_mr_lkey(const RdmaBackendMR *mr)
@@ -49,13 +46,19 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr)
int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
RdmaDeviceResources *rdma_dev_res,
const char *backend_device_name, uint8_t port_num,
- uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
+ struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be,
Error **errp);
void rdma_backend_fini(RdmaBackendDev *backend_dev);
+int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
+ union ibv_gid *gid);
+int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
+ union ibv_gid *gid);
+int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
+ union ibv_gid *gid);
void rdma_backend_start(RdmaBackendDev *backend_dev);
void rdma_backend_stop(RdmaBackendDev *backend_dev);
-void rdma_backend_register_comp_handler(void (*handler)(int status,
- unsigned int vendor_err, void *ctx));
+void rdma_backend_register_comp_handler(void (*handler)(void *ctx,
+ struct ibv_wc *wc));
void rdma_backend_unregister_comp_handler(void);
int rdma_backend_query_port(RdmaBackendDev *backend_dev,
@@ -80,9 +83,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
uint8_t qp_type, uint32_t qkey);
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
- uint8_t qp_type, union ibv_gid *dgid,
- uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
- bool use_qkey);
+ uint8_t qp_type, uint8_t sgid_idx,
+ union ibv_gid *dgid, uint32_t dqpn,
+ uint32_t rq_psn, uint32_t qkey, bool use_qkey);
int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type,
uint32_t sq_psn, uint32_t qkey, bool use_qkey);
int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr,
@@ -92,6 +95,7 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp);
void rdma_backend_post_send(RdmaBackendDev *backend_dev,
RdmaBackendQP *qp, uint8_t qp_type,
struct ibv_sge *sge, uint32_t num_sge,
+ uint8_t sgid_idx, union ibv_gid *sgid,
union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
void *ctx);
void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
index 7404f64002..1e5c3dd3bf 100644
--- a/hw/rdma/rdma_backend_defs.h
+++ b/hw/rdma/rdma_backend_defs.h
@@ -16,8 +16,10 @@
#ifndef RDMA_BACKEND_DEFS_H
#define RDMA_BACKEND_DEFS_H
-#include <infiniband/verbs.h>
#include "qemu/thread.h"
+#include "chardev/char-fe.h"
+#include <infiniband/verbs.h>
+#include "contrib/rdmacm-mux/rdmacm-mux.h"
typedef struct RdmaDeviceResources RdmaDeviceResources;
@@ -28,17 +30,27 @@ typedef struct RdmaBackendThread {
bool is_running; /* Set by the thread to report its status */
} RdmaBackendThread;
+typedef struct RecvMadList {
+ QemuMutex lock;
+ QList *list;
+} RecvMadList;
+
+typedef struct RdmaCmMux {
+ CharBackend *chr_be;
+ int can_receive;
+} RdmaCmMux;
+
typedef struct RdmaBackendDev {
struct ibv_device_attr dev_attr;
RdmaBackendThread comp_thread;
- union ibv_gid gid;
PCIDevice *dev;
RdmaDeviceResources *rdma_dev_res;
struct ibv_device *ib_dev;
struct ibv_context *context;
struct ibv_comp_channel *channel;
uint8_t port_num;
- uint8_t backend_gid_idx;
+ RecvMadList recv_mads_list;
+ RdmaCmMux rdmacm_mux;
} RdmaBackendDev;
typedef struct RdmaBackendPD {
@@ -58,6 +70,7 @@ typedef struct RdmaBackendCQ {
typedef struct RdmaBackendQP {
struct ibv_pd *ibpd;
struct ibv_qp *ibqp;
+ uint8_t sgid_idx;
} RdmaBackendQP;
#endif
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 8d59a42cd1..f5b1295890 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -43,7 +43,7 @@ static inline void res_tbl_free(RdmaRmResTbl *tbl)
{
qemu_mutex_destroy(&tbl->lock);
g_free(tbl->tbl);
- bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
+ g_free(tbl->bitmap);
}
static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
@@ -263,7 +263,7 @@ int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
}
cq->opaque = opaque;
- cq->notify = false;
+ cq->notify = CNT_CLEAR;
rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
if (rc) {
@@ -291,7 +291,10 @@ void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
return;
}
- cq->notify = notify;
+ if (cq->notify != CNT_SET) {
+ cq->notify = notify ? CNT_ARM : CNT_CLEAR;
+ }
+
pr_dbg("notify=%d\n", cq->notify);
}
@@ -349,6 +352,11 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
return -EINVAL;
}
+ if (qp_type == IBV_QPT_GSI) {
+ scq->notify = CNT_SET;
+ rcq->notify = CNT_SET;
+ }
+
qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
if (!qp) {
return -ENOMEM;
@@ -383,7 +391,7 @@ out_dealloc_qp:
}
int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
- uint32_t qp_handle, uint32_t attr_mask,
+ uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
union ibv_gid *dgid, uint32_t dqpn,
enum ibv_qp_state qp_state, uint32_t qkey,
uint32_t rq_psn, uint32_t sq_psn)
@@ -392,6 +400,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
int ret;
pr_dbg("qpn=0x%x\n", qp_handle);
+ pr_dbg("qkey=0x%x\n", qkey);
qp = rdma_rm_get_qp(dev_res, qp_handle);
if (!qp) {
@@ -422,9 +431,19 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
}
if (qp->qp_state == IBV_QPS_RTR) {
+ /* Get backend gid index */
+ pr_dbg("Guest sgid_idx=%d\n", sgid_idx);
+ sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
+ sgid_idx);
+ if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
+ pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx);
+ return -EIO;
+ }
+
ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
- qp->qp_type, dgid, dqpn, rq_psn,
- qkey, attr_mask & IBV_QP_QKEY);
+ qp->qp_type, sgid_idx, dgid, dqpn,
+ rq_psn, qkey,
+ attr_mask & IBV_QP_QKEY);
if (ret) {
return -EIO;
}
@@ -515,11 +534,93 @@ void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
}
+int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ const char *ifname, union ibv_gid *gid, int gid_idx)
+{
+ int rc;
+
+ rc = rdma_backend_add_gid(backend_dev, ifname, gid);
+ if (rc) {
+ pr_dbg("Fail to add gid\n");
+ return -EINVAL;
+ }
+
+ memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
+
+ return 0;
+}
+
+int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ const char *ifname, int gid_idx)
+{
+ int rc;
+
+ if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
+ return 0;
+ }
+
+ rc = rdma_backend_del_gid(backend_dev, ifname,
+ &dev_res->port.gid_tbl[gid_idx].gid);
+ if (rc) {
+ pr_dbg("Fail to delete gid\n");
+ return -EINVAL;
+ }
+
+ memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
+ sizeof(dev_res->port.gid_tbl[gid_idx].gid));
+ dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
+
+ return 0;
+}
+
+int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
+ RdmaBackendDev *backend_dev, int sgid_idx)
+{
+ if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) {
+ pr_dbg("Got invalid sgid_idx %d\n", sgid_idx);
+ return -EINVAL;
+ }
+
+ if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
+ dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
+ rdma_backend_get_gid_index(backend_dev,
+ &dev_res->port.gid_tbl[sgid_idx].gid);
+ }
+
+ pr_dbg("backend_gid_index=%d\n",
+ dev_res->port.gid_tbl[sgid_idx].backend_gid_index);
+
+ return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
+}
+
static void destroy_qp_hash_key(gpointer data)
{
g_bytes_unref(data);
}
+static void init_ports(RdmaDeviceResources *dev_res)
+{
+ int i;
+
+ memset(&dev_res->port, 0, sizeof(dev_res->port));
+
+ dev_res->port.state = IBV_PORT_DOWN;
+ for (i = 0; i < MAX_PORT_GIDS; i++) {
+ dev_res->port.gid_tbl[i].backend_gid_index = -1;
+ }
+}
+
+static void fini_ports(RdmaDeviceResources *dev_res,
+ RdmaBackendDev *backend_dev, const char *ifname)
+{
+ int i;
+
+ dev_res->port.state = IBV_PORT_DOWN;
+ for (i = 0; i < MAX_PORT_GIDS; i++) {
+ rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
+ }
+}
+
int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
Error **errp)
{
@@ -537,11 +638,16 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
dev_attr->max_qp_wr, sizeof(void *));
res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
+ init_ports(dev_res);
+
return 0;
}
-void rdma_rm_fini(RdmaDeviceResources *dev_res)
+void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ const char *ifname)
{
+ fini_ports(dev_res, backend_dev, ifname);
+
res_tbl_free(&dev_res->uc_tbl);
res_tbl_free(&dev_res->cqe_ctx_tbl);
res_tbl_free(&dev_res->qp_tbl);
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index b4e04cc7b4..3c602c04c0 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -22,7 +22,8 @@
int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
Error **errp);
-void rdma_rm_fini(RdmaDeviceResources *dev_res);
+void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ const char *ifname);
int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
uint32_t *pd_handle, uint32_t ctx_handle);
@@ -55,7 +56,7 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
- uint32_t qp_handle, uint32_t attr_mask,
+ uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
union ibv_gid *dgid, uint32_t dqpn,
enum ibv_qp_state qp_state, uint32_t qkey,
uint32_t rq_psn, uint32_t sq_psn);
@@ -69,4 +70,16 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
+int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ const char *ifname, union ibv_gid *gid, int gid_idx);
+int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
+ const char *ifname, int gid_idx);
+int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
+ RdmaBackendDev *backend_dev, int sgid_idx);
+static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res,
+ int sgid_idx)
+{
+ return &dev_res->port.gid_tbl[sgid_idx].gid;
+}
+
#endif
diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index 7228151239..0ba61d1838 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -18,8 +18,8 @@
#include "rdma_backend_defs.h"
-#define MAX_PORTS 1
-#define MAX_PORT_GIDS 1
+#define MAX_PORTS 1 /* Do not change - we support only one port */
+#define MAX_PORT_GIDS 255
#define MAX_GIDS MAX_PORT_GIDS
#define MAX_PORT_PKEYS 1
#define MAX_PKEYS MAX_PORT_PKEYS
@@ -49,10 +49,16 @@ typedef struct RdmaRmPD {
uint32_t ctx_handle;
} RdmaRmPD;
+typedef enum CQNotificationType {
+ CNT_CLEAR,
+ CNT_ARM,
+ CNT_SET,
+} CQNotificationType;
+
typedef struct RdmaRmCQ {
RdmaBackendCQ backend_cq;
void *opaque;
- bool notify;
+ CQNotificationType notify;
} RdmaRmCQ;
/* MR (DMA region) */
@@ -80,13 +86,18 @@ typedef struct RdmaRmQP {
enum ibv_qp_state qp_state;
} RdmaRmQP;
+typedef struct RdmaRmGid {
+ union ibv_gid gid;
+ int backend_gid_index;
+} RdmaRmGid;
+
typedef struct RdmaRmPort {
- union ibv_gid gid_tbl[MAX_PORT_GIDS];
+ RdmaRmGid gid_tbl[MAX_PORT_GIDS];
enum ibv_port_state state;
} RdmaRmPort;
typedef struct RdmaDeviceResources {
- RdmaRmPort ports[MAX_PORTS];
+ RdmaRmPort port;
RdmaRmResTbl pd_tbl;
RdmaRmResTbl mr_tbl;
RdmaRmResTbl uc_tbl;
diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c
index dc23f158f3..4fbea8cde2 100644
--- a/hw/rdma/rdma_utils.c
+++ b/hw/rdma/rdma_utils.c
@@ -13,6 +13,7 @@
*
*/
+#include "qemu/osdep.h"
#include "rdma_utils.h"
#ifdef PVRDMA_DEBUG
diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h
index 04c7c2ef5b..4490ea0b94 100644
--- a/hw/rdma/rdma_utils.h
+++ b/hw/rdma/rdma_utils.h
@@ -17,9 +17,9 @@
#ifndef RDMA_UTILS_H
#define RDMA_UTILS_H
-#include "qemu/osdep.h"
#include "hw/pci/pci.h"
#include "sysemu/dma.h"
+#include "stdio.h"
#define pr_info(fmt, ...) \
fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\
@@ -40,12 +40,36 @@ extern unsigned long pr_dbg_cnt;
#define pr_dbg(fmt, ...) \
fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \
__func__, __LINE__, ## __VA_ARGS__)
+
+#define pr_dbg_buf(title, buf, len) \
+{ \
+ int i; \
+ char *b = g_malloc0(len * 3 + 1); \
+ char b1[4]; \
+ for (i = 0; i < len; i++) { \
+ sprintf(b1, "%.2X ", buf[i] & 0x000000FF); \
+ strcat(b, b1); \
+ } \
+ pr_dbg("%s (%d): %s\n", title, len, b); \
+ g_free(b); \
+}
+
#else
#define init_pr_dbg(void)
#define pr_dbg(fmt, ...)
+#define pr_dbg_buf(title, buf, len)
#endif
void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen);
void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len);
+static inline void addrconf_addr_eui48(uint8_t *eui, const char *addr)
+{
+ memcpy(eui, addr, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ memcpy(eui + 5, addr + 3, 3);
+ eui[0] ^= 2;
+}
+
#endif
diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
index e2d9f93cdf..ffae36986e 100644
--- a/hw/rdma/vmw/pvrdma.h
+++ b/hw/rdma/vmw/pvrdma.h
@@ -17,8 +17,11 @@
#define PVRDMA_PVRDMA_H
#include "qemu/units.h"
+#include "qemu/notify.h"
#include "hw/pci/pci.h"
#include "hw/pci/msix.h"
+#include "chardev/char-fe.h"
+#include "hw/net/vmxnet3_defs.h"
#include "../rdma_backend_defs.h"
#include "../rdma_rm_defs.h"
@@ -51,7 +54,7 @@
#define PVRDMA_FW_VERSION 14
/* Some defaults */
-#define PVRDMA_PKEY 0x7FFF
+#define PVRDMA_PKEY 0xFFFF
typedef struct DSRInfo {
dma_addr_t dma;
@@ -78,11 +81,14 @@ typedef struct PVRDMADev {
int interrupt_mask;
struct ibv_device_attr dev_attr;
uint64_t node_guid;
+ char *backend_eth_device_name;
char *backend_device_name;
- uint8_t backend_gid_idx;
uint8_t backend_port_num;
RdmaBackendDev backend_dev;
RdmaDeviceResources rdma_dev_res;
+ CharBackend mad_chr;
+ VMXNET3State *func0;
+ Notifier shutdown_notifier;
} PVRDMADev;
#define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME)
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 4faeb21631..89920887bf 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -128,6 +128,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req,
struct pvrdma_port_attr attrs = {0};
pr_dbg("port=%d\n", cmd->port_num);
+ if (cmd->port_num > MAX_PORTS) {
+ return -EINVAL;
+ }
if (rdma_backend_query_port(&dev->backend_dev,
(struct ibv_port_attr *)&attrs)) {
@@ -135,11 +138,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req,
}
memset(resp, 0, sizeof(*resp));
- resp->hdr.response = cmd->hdr.response;
- resp->hdr.ack = PVRDMA_CMD_QUERY_PORT_RESP;
- resp->hdr.err = 0;
- resp->attrs.state = attrs.state;
+ resp->attrs.state = dev->func0->device_active ? attrs.state :
+ PVRDMA_PORT_DOWN;
resp->attrs.max_mtu = attrs.max_mtu;
resp->attrs.active_mtu = attrs.active_mtu;
resp->attrs.phys_state = attrs.phys_state;
@@ -159,12 +160,16 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req,
struct pvrdma_cmd_query_pkey_resp *resp = &rsp->query_pkey_resp;
pr_dbg("port=%d\n", cmd->port_num);
+ if (cmd->port_num > MAX_PORTS) {
+ return -EINVAL;
+ }
+
pr_dbg("index=%d\n", cmd->index);
+ if (cmd->index > MAX_PKEYS) {
+ return -EINVAL;
+ }
memset(resp, 0, sizeof(*resp));
- resp->hdr.response = cmd->hdr.response;
- resp->hdr.ack = PVRDMA_CMD_QUERY_PKEY_RESP;
- resp->hdr.err = 0;
resp->pkey = PVRDMA_PKEY;
pr_dbg("pkey=0x%x\n", resp->pkey);
@@ -177,17 +182,15 @@ static int create_pd(PVRDMADev *dev, union pvrdma_cmd_req *req,
{
struct pvrdma_cmd_create_pd *cmd = &req->create_pd;
struct pvrdma_cmd_create_pd_resp *resp = &rsp->create_pd_resp;
+ int rc;
pr_dbg("context=0x%x\n", cmd->ctx_handle ? cmd->ctx_handle : 0);
memset(resp, 0, sizeof(*resp));
- resp->hdr.response = cmd->hdr.response;
- resp->hdr.ack = PVRDMA_CMD_CREATE_PD_RESP;
- resp->hdr.err = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev,
- &resp->pd_handle, cmd->ctx_handle);
+ rc = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev,
+ &resp->pd_handle, cmd->ctx_handle);
- pr_dbg("ret=%d\n", resp->hdr.err);
- return resp->hdr.err;
+ return rc;
}
static int destroy_pd(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -209,10 +212,9 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
struct pvrdma_cmd_create_mr_resp *resp = &rsp->create_mr_resp;
PCIDevice *pci_dev = PCI_DEVICE(dev);
void *host_virt = NULL;
+ int rc = 0;
memset(resp, 0, sizeof(*resp));
- resp->hdr.response = cmd->hdr.response;
- resp->hdr.ack = PVRDMA_CMD_CREATE_MR_RESP;
pr_dbg("pd_handle=%d\n", cmd->pd_handle);
pr_dbg("access_flags=0x%x\n", cmd->access_flags);
@@ -223,22 +225,18 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
cmd->length);
if (!host_virt) {
pr_dbg("Failed to map to pdir\n");
- resp->hdr.err = -EINVAL;
- goto out;
+ return -EINVAL;
}
}
- resp->hdr.err = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle,
- cmd->start, cmd->length, host_virt,
- cmd->access_flags, &resp->mr_handle,
- &resp->lkey, &resp->rkey);
- if (host_virt && !resp->hdr.err) {
+ rc = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle, cmd->start,
+ cmd->length, host_virt, cmd->access_flags,
+ &resp->mr_handle, &resp->lkey, &resp->rkey);
+ if (rc && host_virt) {
munmap(host_virt, cmd->length);
}
-out:
- pr_dbg("ret=%d\n", resp->hdr.err);
- return resp->hdr.err;
+ return rc;
}
static int destroy_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -261,6 +259,11 @@ static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring,
int rc = -EINVAL;
char ring_name[MAX_RING_NAME_SZ];
+ if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
+ pr_dbg("invalid nchunks: %d\n", nchunks);
+ return rc;
+ }
+
pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma);
dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
if (!dir) {
@@ -310,34 +313,43 @@ out:
return rc;
}
+static void destroy_cq_ring(PvrdmaRing *ring)
+{
+ pvrdma_ring_free(ring);
+ /* ring_state was in slot 1, not 0 so need to jump back */
+ rdma_pci_dma_unmap(ring->dev, --ring->ring_state, TARGET_PAGE_SIZE);
+ g_free(ring);
+}
+
static int create_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_create_cq *cmd = &req->create_cq;
struct pvrdma_cmd_create_cq_resp *resp = &rsp->create_cq_resp;
PvrdmaRing *ring = NULL;
+ int rc;
memset(resp, 0, sizeof(*resp));
- resp->hdr.response = cmd->hdr.response;
- resp->hdr.ack = PVRDMA_CMD_CREATE_CQ_RESP;
resp->cqe = cmd->cqe;
- resp->hdr.err = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
- cmd->nchunks, cmd->cqe);
- if (resp->hdr.err) {
- goto out;
+ rc = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, cmd->nchunks,
+ cmd->cqe);
+ if (rc) {
+ return rc;
}
pr_dbg("ring=%p\n", ring);
- resp->hdr.err = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev,
- cmd->cqe, &resp->cq_handle, ring);
+ rc = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev, cmd->cqe,
+ &resp->cq_handle, ring);
+ if (rc) {
+ destroy_cq_ring(ring);
+ }
+
resp->cqe = cmd->cqe;
-out:
- pr_dbg("ret=%d\n", resp->hdr.err);
- return resp->hdr.err;
+ return rc;
}
static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -356,10 +368,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
}
ring = (PvrdmaRing *)cq->opaque;
- pvrdma_ring_free(ring);
- /* ring_state was in slot 1, not 0 so need to jump back */
- rdma_pci_dma_unmap(PCI_DEVICE(dev), --ring->ring_state, TARGET_PAGE_SIZE);
- g_free(ring);
+ destroy_cq_ring(ring);
rdma_rm_dealloc_cq(&dev->rdma_dev_res, cmd->cq_handle);
@@ -377,6 +386,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
char ring_name[MAX_RING_NAME_SZ];
uint32_t wqe_sz;
+ if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES
+ || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) {
+ pr_dbg("invalid pages: %d, %d\n", spages, rpages);
+ return rc;
+ }
+
pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma);
dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
if (!dir) {
@@ -451,36 +466,49 @@ out:
return rc;
}
+static void destroy_qp_rings(PvrdmaRing *ring)
+{
+ pr_dbg("sring=%p\n", &ring[0]);
+ pvrdma_ring_free(&ring[0]);
+ pr_dbg("rring=%p\n", &ring[1]);
+ pvrdma_ring_free(&ring[1]);
+
+ rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
+ g_free(ring);
+}
+
static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_create_qp *cmd = &req->create_qp;
struct pvrdma_cmd_create_qp_resp *resp = &rsp->create_qp_resp;
PvrdmaRing *rings = NULL;
+ int rc;
memset(resp, 0, sizeof(*resp));
- resp->hdr.response = cmd->hdr.response;
- resp->hdr.ack = PVRDMA_CMD_CREATE_QP_RESP;
pr_dbg("total_chunks=%d\n", cmd->total_chunks);
pr_dbg("send_chunks=%d\n", cmd->send_chunks);
- resp->hdr.err = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
- cmd->max_send_wr, cmd->max_send_sge,
- cmd->send_chunks, cmd->max_recv_wr,
- cmd->max_recv_sge, cmd->total_chunks -
- cmd->send_chunks - 1);
- if (resp->hdr.err) {
- goto out;
+ rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
+ cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks,
+ cmd->max_recv_wr, cmd->max_recv_sge,
+ cmd->total_chunks - cmd->send_chunks - 1);
+ if (rc) {
+ return rc;
}
pr_dbg("rings=%p\n", rings);
- resp->hdr.err = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle,
- cmd->qp_type, cmd->max_send_wr,
- cmd->max_send_sge, cmd->send_cq_handle,
- cmd->max_recv_wr, cmd->max_recv_sge,
- cmd->recv_cq_handle, rings, &resp->qpn);
+ rc = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle, cmd->qp_type,
+ cmd->max_send_wr, cmd->max_send_sge,
+ cmd->send_cq_handle, cmd->max_recv_wr,
+ cmd->max_recv_sge, cmd->recv_cq_handle, rings,
+ &resp->qpn);
+ if (rc) {
+ destroy_qp_rings(rings);
+ return rc;
+ }
resp->max_send_wr = cmd->max_send_wr;
resp->max_recv_wr = cmd->max_recv_wr;
@@ -488,32 +516,31 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
resp->max_recv_sge = cmd->max_recv_sge;
resp->max_inline_data = cmd->max_inline_data;
-out:
- pr_dbg("ret=%d\n", resp->hdr.err);
- return resp->hdr.err;
+ return 0;
}
static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_modify_qp *cmd = &req->modify_qp;
+ int rc;
pr_dbg("qp_handle=%d\n", cmd->qp_handle);
memset(rsp, 0, sizeof(*rsp));
- rsp->hdr.response = cmd->hdr.response;
- rsp->hdr.ack = PVRDMA_CMD_MODIFY_QP_RESP;
-
- rsp->hdr.err = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev,
- cmd->qp_handle, cmd->attr_mask,
- (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid,
- cmd->attrs.dest_qp_num,
- (enum ibv_qp_state)cmd->attrs.qp_state,
- cmd->attrs.qkey, cmd->attrs.rq_psn,
- cmd->attrs.sq_psn);
-
- pr_dbg("ret=%d\n", rsp->hdr.err);
- return rsp->hdr.err;
+
+ /* No need to verify sgid_index since it is u8 */
+
+ rc = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev,
+ cmd->qp_handle, cmd->attr_mask,
+ cmd->attrs.ah_attr.grh.sgid_index,
+ (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid,
+ cmd->attrs.dest_qp_num,
+ (enum ibv_qp_state)cmd->attrs.qp_state,
+ cmd->attrs.qkey, cmd->attrs.rq_psn,
+ cmd->attrs.sq_psn);
+
+ return rc;
}
static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -522,21 +549,18 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
struct pvrdma_cmd_query_qp *cmd = &req->query_qp;
struct pvrdma_cmd_query_qp_resp *resp = &rsp->query_qp_resp;
struct ibv_qp_init_attr init_attr;
+ int rc;
pr_dbg("qp_handle=%d\n", cmd->qp_handle);
pr_dbg("attr_mask=0x%x\n", cmd->attr_mask);
memset(rsp, 0, sizeof(*rsp));
- rsp->hdr.response = cmd->hdr.response;
- rsp->hdr.ack = PVRDMA_CMD_QUERY_QP_RESP;
- rsp->hdr.err = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev,
- cmd->qp_handle,
- (struct ibv_qp_attr *)&resp->attrs,
- cmd->attr_mask, &init_attr);
+ rc = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle,
+ (struct ibv_qp_attr *)&resp->attrs, cmd->attr_mask,
+ &init_attr);
- pr_dbg("ret=%d\n", rsp->hdr.err);
- return rsp->hdr.err;
+ return rc;
}
static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -555,13 +579,7 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
ring = (PvrdmaRing *)qp->opaque;
- pr_dbg("sring=%p\n", &ring[0]);
- pvrdma_ring_free(&ring[0]);
- pr_dbg("rring=%p\n", &ring[1]);
- pvrdma_ring_free(&ring[1]);
-
- rdma_pci_dma_unmap(PCI_DEVICE(dev), ring->ring_state, TARGET_PAGE_SIZE);
- g_free(ring);
+ destroy_qp_rings(ring);
return 0;
}
@@ -570,10 +588,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_create_bind *cmd = &req->create_bind;
-#ifdef PVRDMA_DEBUG
- __be64 *subnet = (__be64 *)&cmd->new_gid[0];
- __be64 *if_id = (__be64 *)&cmd->new_gid[8];
-#endif
+ int rc;
+ union ibv_gid *gid = (union ibv_gid *)&cmd->new_gid;
pr_dbg("index=%d\n", cmd->index);
@@ -582,26 +598,20 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
}
pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index,
- (long long unsigned int)be64_to_cpu(*subnet),
- (long long unsigned int)be64_to_cpu(*if_id));
-
- /* Driver forces to one port only */
- memcpy(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, &cmd->new_gid,
- sizeof(cmd->new_gid));
+ (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(gid->global.interface_id));
- /* TODO: Since drivers stores node_guid at load_dsr phase then this
- * assignment is not relevant, i need to figure out a way how to
- * retrieve MAC of our netdev */
- dev->node_guid = dev->rdma_dev_res.ports[0].gid_tbl[0].global.interface_id;
- pr_dbg("dev->node_guid=0x%llx\n",
- (long long unsigned int)be64_to_cpu(dev->node_guid));
+ rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev,
+ dev->backend_eth_device_name, gid, cmd->index);
- return 0;
+ return rc;
}
static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
+ int rc;
+
struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind;
pr_dbg("index=%d\n", cmd->index);
@@ -610,10 +620,10 @@ static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
return -EINVAL;
}
- memset(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, 0,
- sizeof(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw));
+ rc = rdma_rm_del_gid(&dev->rdma_dev_res, &dev->backend_dev,
+ dev->backend_eth_device_name, cmd->index);
- return 0;
+ return rc;
}
static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -621,18 +631,14 @@ static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
{
struct pvrdma_cmd_create_uc *cmd = &req->create_uc;
struct pvrdma_cmd_create_uc_resp *resp = &rsp->create_uc_resp;
+ int rc;
pr_dbg("pfn=%d\n", cmd->pfn);
memset(resp, 0, sizeof(*resp));
- resp->hdr.response = cmd->hdr.response;
- resp->hdr.ack = PVRDMA_CMD_CREATE_UC_RESP;
- resp->hdr.err = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn,
- &resp->ctx_handle);
-
- pr_dbg("ret=%d\n", resp->hdr.err);
+ rc = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn, &resp->ctx_handle);
- return 0;
+ return rc;
}
static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
@@ -646,30 +652,32 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
return 0;
}
+
struct cmd_handler {
uint32_t cmd;
+ uint32_t ack;
int (*exec)(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp);
};
static struct cmd_handler cmd_handlers[] = {
- {PVRDMA_CMD_QUERY_PORT, query_port},
- {PVRDMA_CMD_QUERY_PKEY, query_pkey},
- {PVRDMA_CMD_CREATE_PD, create_pd},
- {PVRDMA_CMD_DESTROY_PD, destroy_pd},
- {PVRDMA_CMD_CREATE_MR, create_mr},
- {PVRDMA_CMD_DESTROY_MR, destroy_mr},
- {PVRDMA_CMD_CREATE_CQ, create_cq},
- {PVRDMA_CMD_RESIZE_CQ, NULL},
- {PVRDMA_CMD_DESTROY_CQ, destroy_cq},
- {PVRDMA_CMD_CREATE_QP, create_qp},
- {PVRDMA_CMD_MODIFY_QP, modify_qp},
- {PVRDMA_CMD_QUERY_QP, query_qp},
- {PVRDMA_CMD_DESTROY_QP, destroy_qp},
- {PVRDMA_CMD_CREATE_UC, create_uc},
- {PVRDMA_CMD_DESTROY_UC, destroy_uc},
- {PVRDMA_CMD_CREATE_BIND, create_bind},
- {PVRDMA_CMD_DESTROY_BIND, destroy_bind},
+ {PVRDMA_CMD_QUERY_PORT, PVRDMA_CMD_QUERY_PORT_RESP, query_port},
+ {PVRDMA_CMD_QUERY_PKEY, PVRDMA_CMD_QUERY_PKEY_RESP, query_pkey},
+ {PVRDMA_CMD_CREATE_PD, PVRDMA_CMD_CREATE_PD_RESP, create_pd},
+ {PVRDMA_CMD_DESTROY_PD, PVRDMA_CMD_DESTROY_PD_RESP_NOOP, destroy_pd},
+ {PVRDMA_CMD_CREATE_MR, PVRDMA_CMD_CREATE_MR_RESP, create_mr},
+ {PVRDMA_CMD_DESTROY_MR, PVRDMA_CMD_DESTROY_MR_RESP_NOOP, destroy_mr},
+ {PVRDMA_CMD_CREATE_CQ, PVRDMA_CMD_CREATE_CQ_RESP, create_cq},
+ {PVRDMA_CMD_RESIZE_CQ, PVRDMA_CMD_RESIZE_CQ_RESP, NULL},
+ {PVRDMA_CMD_DESTROY_CQ, PVRDMA_CMD_DESTROY_CQ_RESP_NOOP, destroy_cq},
+ {PVRDMA_CMD_CREATE_QP, PVRDMA_CMD_CREATE_QP_RESP, create_qp},
+ {PVRDMA_CMD_MODIFY_QP, PVRDMA_CMD_MODIFY_QP_RESP, modify_qp},
+ {PVRDMA_CMD_QUERY_QP, PVRDMA_CMD_QUERY_QP_RESP, query_qp},
+ {PVRDMA_CMD_DESTROY_QP, PVRDMA_CMD_DESTROY_QP_RESP, destroy_qp},
+ {PVRDMA_CMD_CREATE_UC, PVRDMA_CMD_CREATE_UC_RESP, create_uc},
+ {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc},
+ {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind},
+ {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
};
int execute_command(PVRDMADev *dev)
@@ -692,7 +700,12 @@ int execute_command(PVRDMADev *dev)
}
err = cmd_handlers[dsr_info->req->hdr.cmd].exec(dev, dsr_info->req,
- dsr_info->rsp);
+ dsr_info->rsp);
+ dsr_info->rsp->hdr.response = dsr_info->req->hdr.response;
+ dsr_info->rsp->hdr.ack = cmd_handlers[dsr_info->req->hdr.cmd].ack;
+ dsr_info->rsp->hdr.err = err < 0 ? -err : 0;
+ pr_dbg("rsp->hdr.err=%d\n", dsr_info->rsp->hdr.err);
+
out:
set_reg_val(dev, PVRDMA_REG_ERR, err);
post_interrupt(dev, INTR_VEC_CMD_RING);
diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c b/hw/rdma/vmw/pvrdma_dev_ring.c
index 01247fc041..e8e5b502f6 100644
--- a/hw/rdma/vmw/pvrdma_dev_ring.c
+++ b/hw/rdma/vmw/pvrdma_dev_ring.c
@@ -73,23 +73,16 @@ out:
void *pvrdma_ring_next_elem_read(PvrdmaRing *ring)
{
+ int e;
unsigned int idx = 0, offset;
- /*
- pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail,
- ring->ring_state->cons_head);
- */
-
- if (!pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx)) {
+ e = pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx);
+ if (e <= 0) {
pr_dbg("No more data in ring\n");
return NULL;
}
offset = idx * ring->elem_sz;
- /*
- pr_dbg("idx=%d\n", idx);
- pr_dbg("offset=%d\n", offset);
- */
return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE);
}
@@ -105,20 +98,20 @@ void pvrdma_ring_read_inc(PvrdmaRing *ring)
void *pvrdma_ring_next_elem_write(PvrdmaRing *ring)
{
- unsigned int idx, offset, tail;
+ int idx;
+ unsigned int offset, tail;
- /*
- pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail,
- ring->ring_state->cons_head);
- */
-
- if (!pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail)) {
+ idx = pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail);
+ if (idx <= 0) {
pr_dbg("CQ is full\n");
return NULL;
}
idx = pvrdma_idx(&ring->ring_state->prod_tail, ring->max_elems);
- /* TODO: tail == idx */
+ if (idx < 0 || tail != idx) {
+ pr_dbg("invalid idx\n");
+ return NULL;
+ }
offset = idx * ring->elem_sz;
return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE);
diff --git a/hw/rdma/vmw/pvrdma_dev_ring.h b/hw/rdma/vmw/pvrdma_dev_ring.h
index 411d244603..5f2a0cf9b9 100644
--- a/hw/rdma/vmw/pvrdma_dev_ring.h
+++ b/hw/rdma/vmw/pvrdma_dev_ring.h
@@ -16,7 +16,6 @@
#ifndef PVRDMA_DEV_RING_H
#define PVRDMA_DEV_RING_H
-#include "qemu/typedefs.h"
#define MAX_RING_NAME_SZ 32
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index ca5fa8d981..838ad8a949 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -24,6 +24,7 @@
#include "hw/qdev-properties.h"
#include "cpu.h"
#include "trace.h"
+#include "sysemu/sysemu.h"
#include "../rdma_rm.h"
#include "../rdma_backend.h"
@@ -36,9 +37,9 @@
#include "pvrdma_qp_ops.h"
static Property pvrdma_dev_properties[] = {
- DEFINE_PROP_STRING("backend-dev", PVRDMADev, backend_device_name),
- DEFINE_PROP_UINT8("backend-port", PVRDMADev, backend_port_num, 1),
- DEFINE_PROP_UINT8("backend-gid-idx", PVRDMADev, backend_gid_idx, 0),
+ DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name),
+ DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name),
+ DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1),
DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size,
MAX_MR_SIZE),
DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP),
@@ -51,6 +52,7 @@ static Property pvrdma_dev_properties[] = {
DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
+ DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
DEFINE_PROP_END_OF_LIST(),
};
@@ -263,7 +265,7 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
dsr->caps.sys_image_guid = 0;
pr_dbg("sys_image_guid=%" PRIx64 "\n", dsr->caps.sys_image_guid);
- dsr->caps.node_guid = cpu_to_be64(dev->node_guid);
+ dsr->caps.node_guid = dev->node_guid;
pr_dbg("node_guid=%" PRIx64 "\n", be64_to_cpu(dsr->caps.node_guid));
dsr->caps.phys_port_cnt = MAX_PORTS;
@@ -275,17 +277,6 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
pr_dbg("Initialized\n");
}
-static void init_ports(PVRDMADev *dev, Error **errp)
-{
- int i;
-
- memset(dev->rdma_dev_res.ports, 0, sizeof(dev->rdma_dev_res.ports));
-
- for (i = 0; i < MAX_PORTS; i++) {
- dev->rdma_dev_res.ports[i].state = IBV_PORT_DOWN;
- }
-}
-
static void uninit_msix(PCIDevice *pdev, int used_vectors)
{
PVRDMADev *dev = PVRDMA_DEV(pdev);
@@ -334,7 +325,8 @@ static void pvrdma_fini(PCIDevice *pdev)
pvrdma_qp_ops_fini();
- rdma_rm_fini(&dev->rdma_dev_res);
+ rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev,
+ dev->backend_eth_device_name);
rdma_backend_fini(&dev->backend_dev);
@@ -343,6 +335,9 @@ static void pvrdma_fini(PCIDevice *pdev)
if (msix_enabled(pdev)) {
uninit_msix(pdev, RDMA_MAX_INTRS);
}
+
+ pr_dbg("Device %s %x.%x is down\n", pdev->name, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
}
static void pvrdma_stop(PVRDMADev *dev)
@@ -368,13 +363,11 @@ static int unquiesce_device(PVRDMADev *dev)
return 0;
}
-static int reset_device(PVRDMADev *dev)
+static void reset_device(PVRDMADev *dev)
{
pvrdma_stop(dev);
pr_dbg("Device reset complete\n");
-
- return 0;
}
static uint64_t regs_read(void *opaque, hwaddr addr, unsigned size)
@@ -455,6 +448,11 @@ static const MemoryRegionOps regs_ops = {
},
};
+static uint64_t uar_read(void *opaque, hwaddr addr, unsigned size)
+{
+ return 0xffffffff;
+}
+
static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
{
PVRDMADev *dev = opaque;
@@ -496,6 +494,7 @@ static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
}
static const MemoryRegionOps uar_ops = {
+ .read = uar_read,
.write = uar_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.impl = {
@@ -570,12 +569,21 @@ static int pvrdma_check_ram_shared(Object *obj, void *opaque)
return 0;
}
+static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
+{
+ PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier);
+ PCIDevice *pci_dev = PCI_DEVICE(dev);
+
+ pvrdma_fini(pci_dev);
+}
+
static void pvrdma_realize(PCIDevice *pdev, Error **errp)
{
- int rc;
+ int rc = 0;
PVRDMADev *dev = PVRDMA_DEV(pdev);
Object *memdev_root;
bool ram_shared = false;
+ PCIDevice *func0;
init_pr_dbg();
@@ -587,6 +595,20 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
return;
}
+ func0 = pci_get_function_0(pdev);
+ /* Break if not vmxnet3 device in slot 0 */
+ if (strcmp(object_get_typename(&func0->qdev.parent_obj), TYPE_VMXNET3)) {
+ pr_dbg("func0 type is %s\n",
+ object_get_typename(&func0->qdev.parent_obj));
+ error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn),
+ TYPE_VMXNET3);
+ return;
+ }
+ dev->func0 = VMXNET3(func0);
+
+ addrconf_addr_eui48((unsigned char *)&dev->node_guid,
+ (const char *)&dev->func0->conf.macaddr.a);
+
memdev_root = object_resolve_path("/objects", NULL);
if (memdev_root) {
object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared);
@@ -613,7 +635,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res,
dev->backend_device_name, dev->backend_port_num,
- dev->backend_gid_idx, &dev->dev_attr, errp);
+ &dev->dev_attr, &dev->mad_chr, errp);
if (rc) {
goto out;
}
@@ -623,15 +645,17 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
goto out;
}
- init_ports(dev, errp);
-
rc = pvrdma_qp_ops_init();
if (rc) {
goto out;
}
+ dev->shutdown_notifier.notify = pvrdma_shutdown_notifier;
+ qemu_register_shutdown_notifier(&dev->shutdown_notifier);
+
out:
if (rc) {
+ pvrdma_fini(pdev);
error_append_hint(errp, "Device fail to load\n");
}
}
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index c668afd0ed..300471a4c9 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -47,7 +47,7 @@ typedef struct PvrdmaRqWqe {
* 3. Interrupt host
*/
static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
- struct pvrdma_cqe *cqe)
+ struct pvrdma_cqe *cqe, struct ibv_wc *wc)
{
struct pvrdma_cqe *cqe1;
struct pvrdma_cqne *cqne;
@@ -66,6 +66,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
pr_dbg("Writing CQE\n");
cqe1 = pvrdma_ring_next_elem_write(ring);
if (unlikely(!cqe1)) {
+ pr_dbg("No CQEs in ring\n");
return -EINVAL;
}
@@ -73,8 +74,20 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
cqe1->wr_id = cqe->wr_id;
cqe1->qp = cqe->qp;
cqe1->opcode = cqe->opcode;
- cqe1->status = cqe->status;
- cqe1->vendor_err = cqe->vendor_err;
+ cqe1->status = wc->status;
+ cqe1->byte_len = wc->byte_len;
+ cqe1->src_qp = wc->src_qp;
+ cqe1->wc_flags = wc->wc_flags;
+ cqe1->vendor_err = wc->vendor_err;
+
+ pr_dbg("wr_id=%" PRIx64 "\n", cqe1->wr_id);
+ pr_dbg("qp=0x%lx\n", cqe1->qp);
+ pr_dbg("opcode=%d\n", cqe1->opcode);
+ pr_dbg("status=%d\n", cqe1->status);
+ pr_dbg("byte_len=%d\n", cqe1->byte_len);
+ pr_dbg("src_qp=%d\n", cqe1->src_qp);
+ pr_dbg("wc_flags=%d\n", cqe1->wc_flags);
+ pr_dbg("vendor_err=%d\n", cqe1->vendor_err);
pvrdma_ring_write_inc(ring);
@@ -89,26 +102,22 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
pvrdma_ring_write_inc(&dev->dsr_info.cq);
pr_dbg("cq->notify=%d\n", cq->notify);
- if (cq->notify) {
- cq->notify = false;
+ if (cq->notify != CNT_CLEAR) {
+ if (cq->notify == CNT_ARM) {
+ cq->notify = CNT_CLEAR;
+ }
post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
}
return 0;
}
-static void pvrdma_qp_ops_comp_handler(int status, unsigned int vendor_err,
- void *ctx)
+static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc)
{
CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
- pr_dbg("cq_handle=%d\n", comp_ctx->cq_handle);
- pr_dbg("wr_id=%" PRIx64 "\n", comp_ctx->cqe.wr_id);
- pr_dbg("status=%d\n", status);
- pr_dbg("vendor_err=0x%x\n", vendor_err);
- comp_ctx->cqe.status = status;
- comp_ctx->cqe.vendor_err = vendor_err;
- pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe);
+ pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc);
+
g_free(ctx);
}
@@ -129,6 +138,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
RdmaRmQP *qp;
PvrdmaSqWqe *wqe;
PvrdmaRing *ring;
+ int sgid_idx;
+ union ibv_gid *sgid;
pr_dbg("qp_handle=0x%x\n", qp_handle);
@@ -152,10 +163,28 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
comp_ctx->cq_handle = qp->send_cq_handle;
comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
comp_ctx->cqe.qp = qp_handle;
- comp_ctx->cqe.opcode = wqe->hdr.opcode;
+ comp_ctx->cqe.opcode = IBV_WC_SEND;
+
+ sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index);
+ if (!sgid) {
+ pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index);
+ return -EIO;
+ }
+ pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index,
+ sgid->global.interface_id);
+
+ sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res,
+ &dev->backend_dev,
+ wqe->hdr.wr.ud.av.gid_index);
+ if (sgid_idx <= 0) {
+ pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n",
+ wqe->hdr.wr.ud.av.gid_index);
+ return -EIO;
+ }
rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
(struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
+ sgid_idx, sgid,
(union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
wqe->hdr.wr.ud.remote_qpn,
wqe->hdr.wr.ud.remote_qkey, comp_ctx);
@@ -194,8 +223,9 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
comp_ctx = g_malloc(sizeof(CompHandlerCtx));
comp_ctx->dev = dev;
comp_ctx->cq_handle = qp->recv_cq_handle;
- comp_ctx->cqe.qp = qp_handle;
comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
+ comp_ctx->cqe.qp = qp_handle;
+ comp_ctx->cqe.opcode = IBV_WC_RECV;
rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
&qp->backend_qp, qp->qp_type,
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 04ec5cc970..f92b046cd3 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -1290,9 +1290,19 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src)
static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
{
int i;
-
- copy_pmcw_to_guest(&dest->pmcw, &src->pmcw);
- copy_scsw_to_guest(&dest->scsw, &src->scsw);
+ /*
+ * We copy the PMCW and SCSW in and out of local variables to
+ * avoid taking the address of members of a packed struct.
+ */
+ PMCW src_pmcw, dest_pmcw;
+ SCSW src_scsw, dest_scsw;
+
+ src_pmcw = src->pmcw;
+ copy_pmcw_to_guest(&dest_pmcw, &src_pmcw);
+ dest->pmcw = dest_pmcw;
+ src_scsw = src->scsw;
+ copy_scsw_to_guest(&dest_scsw, &src_scsw);
+ dest->scsw = dest_scsw;
dest->mba = cpu_to_be64(src->mba);
for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
dest->mda[i] = src->mda[i];
@@ -1339,9 +1349,19 @@ static void copy_scsw_from_guest(SCSW *dest, const SCSW *src)
static void copy_schib_from_guest(SCHIB *dest, const SCHIB *src)
{
int i;
-
- copy_pmcw_from_guest(&dest->pmcw, &src->pmcw);
- copy_scsw_from_guest(&dest->scsw, &src->scsw);
+ /*
+ * We copy the PMCW and SCSW in and out of local variables to
+ * avoid taking the address of members of a packed struct.
+ */
+ PMCW src_pmcw, dest_pmcw;
+ SCSW src_scsw, dest_scsw;
+
+ src_pmcw = src->pmcw;
+ copy_pmcw_from_guest(&dest_pmcw, &src_pmcw);
+ dest->pmcw = dest_pmcw;
+ src_scsw = src->scsw;
+ copy_scsw_from_guest(&dest_scsw, &src_scsw);
+ dest->scsw = dest_scsw;
dest->mba = be64_to_cpu(src->mba);
for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
dest->mda[i] = src->mda[i];
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index f7458445c0..15759b6514 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -823,8 +823,8 @@ static bool s390_pci_alloc_idx(S390pciState *s, S390PCIBusDevice *pbdev)
return true;
}
-static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
+static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
{
PCIDevice *pdev = NULL;
S390PCIBusDevice *pbdev = NULL;
@@ -932,8 +932,8 @@ static void s390_pcihost_timer_cb(void *opaque)
qdev_unplug(DEVICE(pbdev), NULL);
}
-static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
- DeviceState *dev, Error **errp)
+static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
{
PCIDevice *pci_dev = NULL;
PCIBus *bus;
@@ -1041,8 +1041,8 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data)
dc->reset = s390_pcihost_reset;
dc->realize = s390_pcihost_realize;
- hc->plug = s390_pcihost_hot_plug;
- hc->unplug = s390_pcihost_hot_unplug;
+ hc->plug = s390_pcihost_plug;
+ hc->unplug = s390_pcihost_unplug;
msi_nonbroken = true;
}
diff --git a/hw/smbios/smbios-stub.c b/hw/smbios/smbios-stub.c
index d3a385441a..64e5ba93ec 100644
--- a/hw/smbios/smbios-stub.c
+++ b/hw/smbios/smbios-stub.c
@@ -23,7 +23,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
void smbios_entry_add(QemuOpts *opts, Error **errp)
{
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 04811279a0..818be8a838 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -24,11 +24,10 @@
#include "sysemu/sysemu.h"
#include "qemu/uuid.h"
#include "sysemu/cpus.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
#include "hw/loader.h"
#include "exec/cpu-common.h"
#include "smbios_build.h"
-#include "hw/smbios/ipmi.h"
/* legacy structures and constants for <= 2.0 machines */
struct smbios_header {
diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h
index 93b360d520..56b5a1e3f3 100644
--- a/hw/smbios/smbios_build.h
+++ b/hw/smbios/smbios_build.h
@@ -3,6 +3,7 @@
*
* Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2013 Red Hat, Inc.
+ * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC
*
* Authors:
* Alex Williamson <alex.williamson@hp.com>
@@ -96,4 +97,7 @@ extern unsigned smbios_table_cnt;
smbios_table_cnt++; \
} while (0)
+/* IPMI SMBIOS firmware handling */
+void smbios_build_type_38_table(void);
+
#endif /* QEMU_SMBIOS_BUILD_H */
diff --git a/hw/smbios/smbios_type_38-stub.c b/hw/smbios/smbios_type_38-stub.c
index 5b83c9b1f1..14b53d004b 100644
--- a/hw/smbios/smbios_type_38-stub.c
+++ b/hw/smbios/smbios_type_38-stub.c
@@ -8,7 +8,7 @@
*/
#include "qemu/osdep.h"
-#include "hw/smbios/ipmi.h"
+#include "smbios_build.h"
void smbios_build_type_38_table(void)
{
diff --git a/hw/smbios/smbios_type_38.c b/hw/smbios/smbios_type_38.c
index 56e8609c00..0c08f282de 100644
--- a/hw/smbios/smbios_type_38.c
+++ b/hw/smbios/smbios_type_38.c
@@ -9,8 +9,7 @@
#include "qemu/osdep.h"
#include "hw/ipmi/ipmi.h"
-#include "hw/smbios/ipmi.h"
-#include "hw/smbios/smbios.h"
+#include "hw/firmware/smbios.h"
#include "qemu/error-report.h"
#include "smbios_build.h"
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 0a25f5e096..6166ccd47a 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -10,9 +10,9 @@
* directory.
*/
+#include "qemu/osdep.h"
#include <linux/vfio.h>
#include <sys/ioctl.h>
-#include "qemu/osdep.h"
#include "qapi/error.h"
#include "hw/sysbus.h"
#include "hw/vfio/vfio.h"
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5c7bd96984..c0cb1ec289 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1897,15 +1897,10 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
PCI_EXP_TYPE_ENDPOINT << 4,
PCI_EXP_FLAGS_TYPE);
vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP,
- PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0);
+ QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) |
+ QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0);
vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
}
-
- /* Mark the Link Status bits as emulated to allow virtual negotiation */
- vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA,
- pci_get_word(vdev->pdev.config + pos +
- PCI_EXP_LNKSTA),
- PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
}
/*
diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c
index bf64996e48..8cc3fa3ef7 100644
--- a/hw/virtio/virtio-crypto-pci.c
+++ b/hw/virtio/virtio-crypto-pci.c
@@ -64,9 +64,8 @@ static void virtio_crypto_initfn(Object *obj)
TYPE_VIRTIO_CRYPTO);
}
-static const TypeInfo virtio_crypto_pci_info = {
- .name = TYPE_VIRTIO_CRYPTO_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_crypto_pci_info = {
+ .generic_name = TYPE_VIRTIO_CRYPTO_PCI,
.instance_size = sizeof(VirtIOCryptoPCI),
.instance_init = virtio_crypto_initfn,
.class_init = virtio_crypto_pci_class_init,
@@ -74,6 +73,6 @@ static const TypeInfo virtio_crypto_pci_info = {
static void virtio_crypto_pci_register_types(void)
{
- type_register_static(&virtio_crypto_pci_info);
+ virtio_pci_types_register(&virtio_crypto_pci_info);
}
type_init(virtio_crypto_pci_register_types)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index a954799267..d05066deb8 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1119,9 +1119,11 @@ static void virtio_9p_pci_instance_init(Object *obj)
TYPE_VIRTIO_9P);
}
-static const TypeInfo virtio_9p_pci_info = {
- .name = TYPE_VIRTIO_9P_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = {
+ .base_name = TYPE_VIRTIO_9P_PCI,
+ .generic_name = "virtio-9p-pci",
+ .transitional_name = "virtio-9p-pci-transitional",
+ .non_transitional_name = "virtio-9p-pci-non-transitional",
.instance_size = sizeof(V9fsPCIState),
.instance_init = virtio_9p_pci_instance_init,
.class_init = virtio_9p_pci_class_init,
@@ -1877,9 +1879,6 @@ static void virtio_pci_reset(DeviceState *qdev)
static Property virtio_pci_properties[] = {
DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
- DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
- ON_OFF_AUTO_AUTO),
- DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
@@ -1939,13 +1938,123 @@ static const TypeInfo virtio_pci_info = {
.class_init = virtio_pci_class_init,
.class_size = sizeof(VirtioPCIClass),
.abstract = true,
- .interfaces = (InterfaceInfo[]) {
- { INTERFACE_PCIE_DEVICE },
- { INTERFACE_CONVENTIONAL_PCI_DEVICE },
- { }
- },
};
+static Property virtio_pci_generic_properties[] = {
+ DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
+{
+ const VirtioPCIDeviceTypeInfo *t = data;
+ if (t->class_init) {
+ t->class_init(klass, NULL);
+ }
+}
+
+static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->props = virtio_pci_generic_properties;
+}
+
+/* Used when the generic type and the base type is the same */
+static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data)
+{
+ virtio_pci_base_class_init(klass, data);
+ virtio_pci_generic_class_init(klass, NULL);
+}
+
+static void virtio_pci_transitional_instance_init(Object *obj)
+{
+ VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
+
+ proxy->disable_legacy = ON_OFF_AUTO_OFF;
+ proxy->disable_modern = false;
+}
+
+static void virtio_pci_non_transitional_instance_init(Object *obj)
+{
+ VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
+
+ proxy->disable_legacy = ON_OFF_AUTO_ON;
+ proxy->disable_modern = false;
+}
+
+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
+{
+ TypeInfo base_type_info = {
+ .name = t->base_name,
+ .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI,
+ .instance_size = t->instance_size,
+ .instance_init = t->instance_init,
+ .class_init = virtio_pci_base_class_init,
+ .class_data = (void *)t,
+ .abstract = true,
+ };
+ TypeInfo generic_type_info = {
+ .name = t->generic_name,
+ .parent = base_type_info.name,
+ .class_init = virtio_pci_generic_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { }
+ },
+ };
+
+ if (!base_type_info.name) {
+ /* No base type -> register a single generic device type */
+ base_type_info.name = t->generic_name;
+ base_type_info.class_init = virtio_pci_generic_base_class_init;
+ base_type_info.interfaces = generic_type_info.interfaces;
+ base_type_info.abstract = false;
+ generic_type_info.name = NULL;
+ assert(!t->non_transitional_name);
+ assert(!t->transitional_name);
+ }
+
+ type_register(&base_type_info);
+ if (generic_type_info.name) {
+ type_register(&generic_type_info);
+ }
+
+ if (t->non_transitional_name) {
+ const TypeInfo non_transitional_type_info = {
+ .name = t->non_transitional_name,
+ .parent = base_type_info.name,
+ .instance_init = virtio_pci_non_transitional_instance_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { }
+ },
+ };
+ type_register(&non_transitional_type_info);
+ }
+
+ if (t->transitional_name) {
+ const TypeInfo transitional_type_info = {
+ .name = t->transitional_name,
+ .parent = base_type_info.name,
+ .instance_init = virtio_pci_transitional_instance_init,
+ .interfaces = (InterfaceInfo[]) {
+ /*
+ * Transitional virtio devices work only as Conventional PCI
+ * devices because they require PIO ports.
+ */
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { }
+ },
+ };
+ type_register(&transitional_type_info);
+ }
+}
+
/* virtio-blk-pci */
static Property virtio_blk_pci_properties[] = {
@@ -1995,9 +2104,11 @@ static void virtio_blk_pci_instance_init(Object *obj)
"bootindex", &error_abort);
}
-static const TypeInfo virtio_blk_pci_info = {
- .name = TYPE_VIRTIO_BLK_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = {
+ .base_name = TYPE_VIRTIO_BLK_PCI,
+ .generic_name = "virtio-blk-pci",
+ .transitional_name = "virtio-blk-pci-transitional",
+ .non_transitional_name = "virtio-blk-pci-non-transitional",
.instance_size = sizeof(VirtIOBlkPCI),
.instance_init = virtio_blk_pci_instance_init,
.class_init = virtio_blk_pci_class_init,
@@ -2051,9 +2162,11 @@ static void vhost_user_blk_pci_instance_init(Object *obj)
"bootindex", &error_abort);
}
-static const TypeInfo vhost_user_blk_pci_info = {
- .name = TYPE_VHOST_USER_BLK_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = {
+ .base_name = TYPE_VHOST_USER_BLK_PCI,
+ .generic_name = "vhost-user-blk-pci",
+ .transitional_name = "vhost-user-blk-pci-transitional",
+ .non_transitional_name = "vhost-user-blk-pci-non-transitional",
.instance_size = sizeof(VHostUserBlkPCI),
.instance_init = vhost_user_blk_pci_instance_init,
.class_init = vhost_user_blk_pci_class_init,
@@ -2119,9 +2232,11 @@ static void virtio_scsi_pci_instance_init(Object *obj)
TYPE_VIRTIO_SCSI);
}
-static const TypeInfo virtio_scsi_pci_info = {
- .name = TYPE_VIRTIO_SCSI_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = {
+ .base_name = TYPE_VIRTIO_SCSI_PCI,
+ .generic_name = "virtio-scsi-pci",
+ .transitional_name = "virtio-scsi-pci-transitional",
+ .non_transitional_name = "virtio-scsi-pci-non-transitional",
.instance_size = sizeof(VirtIOSCSIPCI),
.instance_init = virtio_scsi_pci_instance_init,
.class_init = virtio_scsi_pci_class_init,
@@ -2174,9 +2289,11 @@ static void vhost_scsi_pci_instance_init(Object *obj)
"bootindex", &error_abort);
}
-static const TypeInfo vhost_scsi_pci_info = {
- .name = TYPE_VHOST_SCSI_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = {
+ .base_name = TYPE_VHOST_SCSI_PCI,
+ .generic_name = "vhost-scsi-pci",
+ .transitional_name = "vhost-scsi-pci-transitional",
+ .non_transitional_name = "vhost-scsi-pci-non-transitional",
.instance_size = sizeof(VHostSCSIPCI),
.instance_init = vhost_scsi_pci_instance_init,
.class_init = vhost_scsi_pci_class_init,
@@ -2229,9 +2346,11 @@ static void vhost_user_scsi_pci_instance_init(Object *obj)
"bootindex", &error_abort);
}
-static const TypeInfo vhost_user_scsi_pci_info = {
- .name = TYPE_VHOST_USER_SCSI_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = {
+ .base_name = TYPE_VHOST_USER_SCSI_PCI,
+ .generic_name = "vhost-user-scsi-pci",
+ .transitional_name = "vhost-user-scsi-pci-transitional",
+ .non_transitional_name = "vhost-user-scsi-pci-non-transitional",
.instance_size = sizeof(VHostUserSCSIPCI),
.instance_init = vhost_user_scsi_pci_instance_init,
.class_init = vhost_user_scsi_pci_class_init,
@@ -2277,9 +2396,11 @@ static void vhost_vsock_pci_instance_init(Object *obj)
TYPE_VHOST_VSOCK);
}
-static const TypeInfo vhost_vsock_pci_info = {
- .name = TYPE_VHOST_VSOCK_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = {
+ .base_name = TYPE_VHOST_VSOCK_PCI,
+ .generic_name = "vhost-vsock-pci",
+ .transitional_name = "vhost-vsock-pci-transitional",
+ .non_transitional_name = "vhost-vsock-pci-non-transitional",
.instance_size = sizeof(VHostVSockPCI),
.instance_init = vhost_vsock_pci_instance_init,
.class_init = vhost_vsock_pci_class_init,
@@ -2334,9 +2455,11 @@ static void virtio_balloon_pci_instance_init(Object *obj)
"guest-stats-polling-interval", &error_abort);
}
-static const TypeInfo virtio_balloon_pci_info = {
- .name = TYPE_VIRTIO_BALLOON_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = {
+ .base_name = TYPE_VIRTIO_BALLOON_PCI,
+ .generic_name = "virtio-balloon-pci",
+ .transitional_name = "virtio-balloon-pci-transitional",
+ .non_transitional_name = "virtio-balloon-pci-non-transitional",
.instance_size = sizeof(VirtIOBalloonPCI),
.instance_init = virtio_balloon_pci_instance_init,
.class_init = virtio_balloon_pci_class_init,
@@ -2407,9 +2530,11 @@ static void virtio_serial_pci_instance_init(Object *obj)
TYPE_VIRTIO_SERIAL);
}
-static const TypeInfo virtio_serial_pci_info = {
- .name = TYPE_VIRTIO_SERIAL_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = {
+ .base_name = TYPE_VIRTIO_SERIAL_PCI,
+ .generic_name = "virtio-serial-pci",
+ .transitional_name = "virtio-serial-pci-transitional",
+ .non_transitional_name = "virtio-serial-pci-non-transitional",
.instance_size = sizeof(VirtIOSerialPCI),
.instance_init = virtio_serial_pci_instance_init,
.class_init = virtio_serial_pci_class_init,
@@ -2462,9 +2587,11 @@ static void virtio_net_pci_instance_init(Object *obj)
"bootindex", &error_abort);
}
-static const TypeInfo virtio_net_pci_info = {
- .name = TYPE_VIRTIO_NET_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = {
+ .base_name = TYPE_VIRTIO_NET_PCI,
+ .generic_name = "virtio-net-pci",
+ .transitional_name = "virtio-net-pci-transitional",
+ .non_transitional_name = "virtio-net-pci-non-transitional",
.instance_size = sizeof(VirtIONetPCI),
.instance_init = virtio_net_pci_instance_init,
.class_init = virtio_net_pci_class_init,
@@ -2513,9 +2640,11 @@ static void virtio_rng_initfn(Object *obj)
TYPE_VIRTIO_RNG);
}
-static const TypeInfo virtio_rng_pci_info = {
- .name = TYPE_VIRTIO_RNG_PCI,
- .parent = TYPE_VIRTIO_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = {
+ .base_name = TYPE_VIRTIO_RNG_PCI,
+ .generic_name = "virtio-rng-pci",
+ .transitional_name = "virtio-rng-pci-transitional",
+ .non_transitional_name = "virtio-rng-pci-non-transitional",
.instance_size = sizeof(VirtIORngPCI),
.instance_init = virtio_rng_initfn,
.class_init = virtio_rng_pci_class_init,
@@ -2605,24 +2734,24 @@ static const TypeInfo virtio_input_hid_pci_info = {
.abstract = true,
};
-static const TypeInfo virtio_keyboard_pci_info = {
- .name = TYPE_VIRTIO_KEYBOARD_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_keyboard_pci_info = {
+ .generic_name = TYPE_VIRTIO_KEYBOARD_PCI,
.parent = TYPE_VIRTIO_INPUT_HID_PCI,
.class_init = virtio_input_hid_kbd_pci_class_init,
.instance_size = sizeof(VirtIOInputHIDPCI),
.instance_init = virtio_keyboard_initfn,
};
-static const TypeInfo virtio_mouse_pci_info = {
- .name = TYPE_VIRTIO_MOUSE_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_mouse_pci_info = {
+ .generic_name = TYPE_VIRTIO_MOUSE_PCI,
.parent = TYPE_VIRTIO_INPUT_HID_PCI,
.class_init = virtio_input_hid_mouse_pci_class_init,
.instance_size = sizeof(VirtIOInputHIDPCI),
.instance_init = virtio_mouse_initfn,
};
-static const TypeInfo virtio_tablet_pci_info = {
- .name = TYPE_VIRTIO_TABLET_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_tablet_pci_info = {
+ .generic_name = TYPE_VIRTIO_TABLET_PCI,
.parent = TYPE_VIRTIO_INPUT_HID_PCI,
.instance_size = sizeof(VirtIOInputHIDPCI),
.instance_init = virtio_tablet_initfn,
@@ -2637,8 +2766,11 @@ static void virtio_host_initfn(Object *obj)
TYPE_VIRTIO_INPUT_HOST);
}
-static const TypeInfo virtio_host_pci_info = {
- .name = TYPE_VIRTIO_INPUT_HOST_PCI,
+static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = {
+ .base_name = TYPE_VIRTIO_INPUT_HOST_PCI,
+ .generic_name = "virtio-input-host-pci",
+ .transitional_name = "virtio-input-host-pci-transitional",
+ .non_transitional_name = "virtio-input-host-pci-non-transitional",
.parent = TYPE_VIRTIO_INPUT_PCI,
.instance_size = sizeof(VirtIOInputHostPCI),
.instance_init = virtio_host_initfn,
@@ -2692,36 +2824,39 @@ static const TypeInfo virtio_pci_bus_info = {
static void virtio_pci_register_types(void)
{
- type_register_static(&virtio_rng_pci_info);
+ /* Base types: */
+ type_register_static(&virtio_pci_bus_info);
+ type_register_static(&virtio_pci_info);
type_register_static(&virtio_input_pci_info);
type_register_static(&virtio_input_hid_pci_info);
- type_register_static(&virtio_keyboard_pci_info);
- type_register_static(&virtio_mouse_pci_info);
- type_register_static(&virtio_tablet_pci_info);
+
+ /* Implementations: */
+ virtio_pci_types_register(&virtio_rng_pci_info);
+ virtio_pci_types_register(&virtio_keyboard_pci_info);
+ virtio_pci_types_register(&virtio_mouse_pci_info);
+ virtio_pci_types_register(&virtio_tablet_pci_info);
#ifdef CONFIG_LINUX
- type_register_static(&virtio_host_pci_info);
+ virtio_pci_types_register(&virtio_host_pci_info);
#endif
- type_register_static(&virtio_pci_bus_info);
- type_register_static(&virtio_pci_info);
#ifdef CONFIG_VIRTFS
- type_register_static(&virtio_9p_pci_info);
+ virtio_pci_types_register(&virtio_9p_pci_info);
#endif
- type_register_static(&virtio_blk_pci_info);
+ virtio_pci_types_register(&virtio_blk_pci_info);
#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
- type_register_static(&vhost_user_blk_pci_info);
+ virtio_pci_types_register(&vhost_user_blk_pci_info);
#endif
- type_register_static(&virtio_scsi_pci_info);
- type_register_static(&virtio_balloon_pci_info);
- type_register_static(&virtio_serial_pci_info);
- type_register_static(&virtio_net_pci_info);
+ virtio_pci_types_register(&virtio_scsi_pci_info);
+ virtio_pci_types_register(&virtio_balloon_pci_info);
+ virtio_pci_types_register(&virtio_serial_pci_info);
+ virtio_pci_types_register(&virtio_net_pci_info);
#ifdef CONFIG_VHOST_SCSI
- type_register_static(&vhost_scsi_pci_info);
+ virtio_pci_types_register(&vhost_scsi_pci_info);
#endif
#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
- type_register_static(&vhost_user_scsi_pci_info);
+ virtio_pci_types_register(&vhost_user_scsi_pci_info);
#endif
#ifdef CONFIG_VHOST_VSOCK
- type_register_static(&vhost_vsock_pci_info);
+ virtio_pci_types_register(&vhost_vsock_pci_info);
#endif
}
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 813082b0d7..29b4216107 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -216,7 +216,7 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy)
/*
* virtio-scsi-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci"
+#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci-base"
#define VIRTIO_SCSI_PCI(obj) \
OBJECT_CHECK(VirtIOSCSIPCI, (obj), TYPE_VIRTIO_SCSI_PCI)
@@ -229,7 +229,7 @@ struct VirtIOSCSIPCI {
/*
* vhost-scsi-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci"
+#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci-base"
#define VHOST_SCSI_PCI(obj) \
OBJECT_CHECK(VHostSCSIPCI, (obj), TYPE_VHOST_SCSI_PCI)
@@ -239,7 +239,7 @@ struct VHostSCSIPCI {
};
#endif
-#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci"
+#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci-base"
#define VHOST_USER_SCSI_PCI(obj) \
OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI)
@@ -252,7 +252,7 @@ struct VHostUserSCSIPCI {
/*
* vhost-user-blk-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci"
+#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci-base"
#define VHOST_USER_BLK_PCI(obj) \
OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI)
@@ -265,7 +265,7 @@ struct VHostUserBlkPCI {
/*
* virtio-blk-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci"
+#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base"
#define VIRTIO_BLK_PCI(obj) \
OBJECT_CHECK(VirtIOBlkPCI, (obj), TYPE_VIRTIO_BLK_PCI)
@@ -277,7 +277,7 @@ struct VirtIOBlkPCI {
/*
* virtio-balloon-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci"
+#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci-base"
#define VIRTIO_BALLOON_PCI(obj) \
OBJECT_CHECK(VirtIOBalloonPCI, (obj), TYPE_VIRTIO_BALLOON_PCI)
@@ -289,7 +289,7 @@ struct VirtIOBalloonPCI {
/*
* virtio-serial-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci"
+#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci-base"
#define VIRTIO_SERIAL_PCI(obj) \
OBJECT_CHECK(VirtIOSerialPCI, (obj), TYPE_VIRTIO_SERIAL_PCI)
@@ -301,7 +301,7 @@ struct VirtIOSerialPCI {
/*
* virtio-net-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VIRTIO_NET_PCI "virtio-net-pci"
+#define TYPE_VIRTIO_NET_PCI "virtio-net-pci-base"
#define VIRTIO_NET_PCI(obj) \
OBJECT_CHECK(VirtIONetPCI, (obj), TYPE_VIRTIO_NET_PCI)
@@ -316,7 +316,7 @@ struct VirtIONetPCI {
#ifdef CONFIG_VIRTFS
-#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci"
+#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci-base"
#define VIRTIO_9P_PCI(obj) \
OBJECT_CHECK(V9fsPCIState, (obj), TYPE_VIRTIO_9P_PCI)
@@ -330,7 +330,7 @@ typedef struct V9fsPCIState {
/*
* virtio-rng-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci"
+#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci-base"
#define VIRTIO_RNG_PCI(obj) \
OBJECT_CHECK(VirtIORngPCI, (obj), TYPE_VIRTIO_RNG_PCI)
@@ -365,7 +365,7 @@ struct VirtIOInputHIDPCI {
#ifdef CONFIG_LINUX
-#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci"
+#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base"
#define VIRTIO_INPUT_HOST_PCI(obj) \
OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI)
@@ -392,7 +392,7 @@ struct VirtIOGPUPCI {
/*
* vhost-vsock-pci: This extends VirtioPCIProxy.
*/
-#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci"
+#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci-base"
#define VHOST_VSOCK_PCI(obj) \
OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI)
@@ -417,4 +417,58 @@ struct VirtIOCryptoPCI {
/* Virtio ABI version, if we increment this, we break the guest driver. */
#define VIRTIO_PCI_ABI_VERSION 0
+/* Input for virtio_pci_types_register() */
+typedef struct VirtioPCIDeviceTypeInfo {
+ /*
+ * Common base class for the subclasses below.
+ *
+ * Required only if transitional_name or non_transitional_name is set.
+ *
+ * We need a separate base type instead of making all types
+ * inherit from generic_name for two reasons:
+ * 1) generic_name implements INTERFACE_PCIE_DEVICE, but
+ * transitional_name does not.
+ * 2) generic_name has the "disable-legacy" and "disable-modern"
+ * properties, transitional_name and non_transitional name don't.
+ */
+ const char *base_name;
+ /*
+ * Generic device type. Optional.
+ *
+ * Supports both transitional and non-transitional modes,
+ * using the disable-legacy and disable-modern properties.
+ * If disable-legacy=auto, (non-)transitional mode is selected
+ * depending on the bus where the device is plugged.
+ *
+ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE,
+ * but PCI Express is supported only in non-transitional mode.
+ *
+ * The only type implemented by QEMU 3.1 and older.
+ */
+ const char *generic_name;
+ /*
+ * The transitional device type. Optional.
+ *
+ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE.
+ */
+ const char *transitional_name;
+ /*
+ * The non-transitional device type. Optional.
+ *
+ * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only.
+ */
+ const char *non_transitional_name;
+
+ /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */
+ const char *parent;
+
+ /* Same as TypeInfo fields: */
+ size_t instance_size;
+ void (*instance_init)(Object *obj);
+ void (*class_init)(ObjectClass *klass, void *data);
+} VirtioPCIDeviceTypeInfo;
+
+/* Register virtio-pci type(s). @t must be static. */
+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
+
#endif