diff options
52 files changed, 1303 insertions, 452 deletions
diff --git a/block/parallels-ext.c b/block/parallels-ext.c index 8a109f005a..4d8ecf5047 100644 --- a/block/parallels-ext.c +++ b/block/parallels-ext.c @@ -130,7 +130,7 @@ static BdrvDirtyBitmap *parallels_load_bitmap(BlockDriverState *bs, g_autofree uint64_t *l1_table = NULL; BdrvDirtyBitmap *bitmap; QemuUUID uuid; - char uuidstr[UUID_FMT_LEN + 1]; + char uuidstr[UUID_STR_LEN]; int i; if (data_size < sizeof(bf)) { diff --git a/block/vdi.c b/block/vdi.c index c647d72895..7cfd12b50d 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -239,7 +239,7 @@ static void vdi_header_to_le(VdiHeader *header) static void vdi_header_print(VdiHeader *header) { - char uuidstr[37]; + char uuidstr[UUID_STR_LEN]; QemuUUID uuid; logout("text %s", header->text); logout("signature 0x%08x\n", header->signature); diff --git a/docs/devel/index-api.rst b/docs/devel/index-api.rst index 539ad29c21..fe01b2b488 100644 --- a/docs/devel/index-api.rst +++ b/docs/devel/index-api.rst @@ -11,6 +11,7 @@ generated from in-code annotations to function prototypes. loads-stores memory modules + pci qom-api qdev-api ui diff --git a/docs/devel/pci.rst b/docs/devel/pci.rst new file mode 100644 index 0000000000..68739334f3 --- /dev/null +++ b/docs/devel/pci.rst @@ -0,0 +1,8 @@ +============= +PCI subsystem +============= + +API Reference +------------- + +.. kernel-doc:: include/hw/pci/pci.h diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index 49a80550c5..e8711ae16a 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -738,6 +738,10 @@ static AddressSpace *typhoon_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &s->pchip.iommu_as; } +static const PCIIOMMUOps typhoon_iommu_ops = { + .get_address_space = typhoon_pci_dma_iommu, +}; + static void typhoon_set_irq(void *opaque, int irq, int level) { TyphoonState *s = opaque; @@ -897,7 +901,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, qemu_irq *p_isa_irq, "iommu-typhoon", UINT64_MAX); address_space_init(&s->pchip.iommu_as, MEMORY_REGION(&s->pchip.iommu), "pchip0-pci"); - pci_setup_iommu(b, typhoon_pci_dma_iommu, s); + pci_setup_iommu(b, &typhoon_iommu_ops, s); /* Pchip0 PCI special/interrupt acknowledge, 0x801.F800.0000, 64MB. */ memory_region_init_io(&s->pchip.reg_iack, OBJECT(s), &alpha_pci_iack_ops, diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index f35ae9aa22..9a8ac45431 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -605,6 +605,10 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) return &sdev->as; } +static const PCIIOMMUOps smmu_ops = { + .get_address_space = smmu_find_add_as, +}; + IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) { uint8_t bus_n, devfn; @@ -661,7 +665,7 @@ static void smmu_base_realize(DeviceState *dev, Error **errp) s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL); if (s->primary_bus) { - pci_setup_iommu(s->primary_bus, smmu_find_add_as, s); + pci_setup_iommu(s->primary_bus, &smmu_ops, s); } else { error_setg(errp, "SMMU is not attached to any PCI bus!"); } diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 2f1dbb3fd7..b46d16cd2c 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -705,7 +705,7 @@ static void get_reserved_region(Object *obj, Visitor *v, const char *name, int rc; rc = snprintf(buffer, sizeof(buffer), "0x%"PRIx64":0x%"PRIx64":%u", - rr->low, rr->high, rr->type); + range_lob(&rr->range), range_upb(&rr->range), rr->type); assert(rc < sizeof(buffer)); visit_type_str(v, name, &p, errp); @@ -717,6 +717,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, Property *prop = opaque; ReservedRegion *rr = object_field_prop_ptr(obj, prop); const char *endptr; + uint64_t lob, upb; char *str; int ret; @@ -724,7 +725,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, return; } - ret = qemu_strtou64(str, &endptr, 16, &rr->low); + ret = qemu_strtou64(str, &endptr, 16, &lob); if (ret) { error_setg(errp, "start address of '%s'" " must be a hexadecimal integer", name); @@ -734,7 +735,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, goto separator_error; } - ret = qemu_strtou64(endptr + 1, &endptr, 16, &rr->high); + ret = qemu_strtou64(endptr + 1, &endptr, 16, &upb); if (ret) { error_setg(errp, "end address of '%s'" " must be a hexadecimal integer", name); @@ -744,6 +745,8 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, goto separator_error; } + range_set_bounds(&rr->range, lob, upb); + ret = qemu_strtoui(endptr + 1, &endptr, 10, &rr->type); if (ret) { error_setg(errp, "type of '%s'" @@ -1111,7 +1114,7 @@ static void get_uuid(Object *obj, Visitor *v, const char *name, void *opaque, { Property *prop = opaque; QemuUUID *uuid = object_field_prop_ptr(obj, prop); - char buffer[UUID_FMT_LEN + 1]; + char buffer[UUID_STR_LEN]; char *p = buffer; qemu_uuid_unparse(uuid, buffer); diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c index 271289f902..c64eaa5a46 100644 --- a/hw/hyperv/vmbus.c +++ b/hw/hyperv/vmbus.c @@ -2271,7 +2271,7 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp) VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev)); BusChild *child; Error *err = NULL; - char idstr[UUID_FMT_LEN + 1]; + char idstr[UUID_STR_LEN]; assert(!qemu_uuid_is_null(&vdev->instanceid)); @@ -2467,7 +2467,7 @@ static char *vmbus_get_dev_path(DeviceState *dev) static char *vmbus_get_fw_dev_path(DeviceState *dev) { VMBusDevice *vdev = VMBUS_DEVICE(dev); - char uuid[UUID_FMT_LEN + 1]; + char uuid[UUID_STR_LEN]; qemu_uuid_unparse(&vdev->instanceid, uuid); return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 7965415b47..4203144da9 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1450,6 +1450,10 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &iommu_as[devfn]->as; } +static const PCIIOMMUOps amdvi_iommu_ops = { + .get_address_space = amdvi_host_dma_iommu, +}; + static const MemoryRegionOps mmio_mem_ops = { .read = amdvi_mmio_read, .write = amdvi_mmio_write, @@ -1581,7 +1585,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) AMDVI_MMIO_SIZE); memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR, &s->mmio); - pci_setup_iommu(bus, amdvi_host_dma_iommu, s); + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 1c6c18622f..a6f1a7aa52 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -4088,6 +4088,10 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &vtd_as->as; } +static PCIIOMMUOps vtd_iommu_ops = { + .get_address_space = vtd_host_dma_iommu, +}; + static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); @@ -4210,7 +4214,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); vtd_init(s); - pci_setup_iommu(bus, vtd_host_dma_iommu, dev); + pci_setup_iommu(bus, &vtd_iommu_ops, dev); /* Pseudo address space under root PCI bus. */ x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); qemu_add_machine_init_done_notifier(&vtd_machine_done_notify); diff --git a/hw/pci-host/astro.c b/hw/pci-host/astro.c index 4b2d7caf2d..84e0ca14ac 100644 --- a/hw/pci-host/astro.c +++ b/hw/pci-host/astro.c @@ -345,6 +345,10 @@ static AddressSpace *elroy_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->astro->iommu_as; } +static const PCIIOMMUOps elroy_pcihost_iommu_ops = { + .get_address_space = elroy_pcihost_set_iommu, +}; + /* * Encoding in IOSAPIC: * base_addr == 0xfffa0000, we want to get 0xa0ff0000. @@ -834,7 +838,7 @@ static void astro_realize(DeviceState *obj, Error **errp) &elroy->pci_io); /* Host memory as seen from the PCI side, via the IOMMU. */ - pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, elroy_pcihost_set_iommu, + pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, &elroy_pcihost_iommu_ops, elroy); } } diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c index 6f5442f108..f477f97847 100644 --- a/hw/pci-host/designware.c +++ b/hw/pci-host/designware.c @@ -663,6 +663,10 @@ static AddressSpace *designware_pcie_host_set_iommu(PCIBus *bus, void *opaque, return &s->pci.address_space; } +static const PCIIOMMUOps designware_iommu_ops = { + .get_address_space = designware_pcie_host_set_iommu, +}; + static void designware_pcie_host_realize(DeviceState *dev, Error **errp) { PCIHostState *pci = PCI_HOST_BRIDGE(dev); @@ -705,7 +709,7 @@ static void designware_pcie_host_realize(DeviceState *dev, Error **errp) address_space_init(&s->pci.address_space, &s->pci.address_space_root, "pcie-bus-address-space"); - pci_setup_iommu(pci->bus, designware_pcie_host_set_iommu, s); + pci_setup_iommu(pci->bus, &designware_iommu_ops, s); qdev_realize(DEVICE(&s->root), BUS(pci->bus), &error_fatal); } diff --git a/hw/pci-host/dino.c b/hw/pci-host/dino.c index 82503229fa..5b0947a16c 100644 --- a/hw/pci-host/dino.c +++ b/hw/pci-host/dino.c @@ -354,6 +354,10 @@ static AddressSpace *dino_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->bm_as; } +static const PCIIOMMUOps dino_iommu_ops = { + .get_address_space = dino_pcihost_set_iommu, +}; + /* * Dino interrupts are connected as shown on Page 78, Table 23 * (Little-endian bit numbers) @@ -481,7 +485,7 @@ static void dino_pcihost_init(Object *obj) g_free(name); } - pci_setup_iommu(phb->bus, dino_pcihost_set_iommu, s); + pci_setup_iommu(phb->bus, &dino_iommu_ops, s); sysbus_init_mmio(sbd, &s->this_mem); diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index c5e58f4086..2a74dbe45f 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -968,6 +968,10 @@ static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &ds->dma_as; } +static PCIIOMMUOps pnv_phb3_iommu_ops = { + .get_address_space = pnv_phb3_dma_iommu, +}; + static void pnv_phb3_instance_init(Object *obj) { PnvPHB3 *phb = PNV_PHB3(obj); @@ -1012,7 +1016,7 @@ void pnv_phb3_bus_init(DeviceState *dev, PnvPHB3 *phb) object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id, &error_abort); - pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb); + pci_setup_iommu(pci->bus, &pnv_phb3_iommu_ops, phb); } static void pnv_phb3_realize(DeviceState *dev, Error **errp) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 29cb11a5d9..37c7afc18c 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -1518,6 +1518,10 @@ static void pnv_phb4_xscom_realize(PnvPHB4 *phb) &phb->phb_regs_mr); } +static PCIIOMMUOps pnv_phb4_iommu_ops = { + .get_address_space = pnv_phb4_dma_iommu, +}; + static void pnv_phb4_instance_init(Object *obj) { PnvPHB4 *phb = PNV_PHB4(obj); @@ -1557,7 +1561,7 @@ void pnv_phb4_bus_init(DeviceState *dev, PnvPHB4 *phb) object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id, &error_abort); - pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb); + pci_setup_iommu(pci->bus, &pnv_phb4_iommu_ops, phb); pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE; } diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index 38814247f2..453a4e6ed3 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -435,6 +435,10 @@ static AddressSpace *e500_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->bm_as; } +static const PCIIOMMUOps ppce500_iommu_ops = { + .get_address_space = e500_pcihost_set_iommu, +}; + static void e500_pcihost_realize(DeviceState *dev, Error **errp) { SysBusDevice *sbd = SYS_BUS_DEVICE(dev); @@ -469,7 +473,7 @@ static void e500_pcihost_realize(DeviceState *dev, Error **errp) memory_region_init(&s->bm, OBJECT(s), "bm-e500", UINT64_MAX); memory_region_add_subregion(&s->bm, 0x0, &s->busmem); address_space_init(&s->bm_as, &s->bm, "pci-bm"); - pci_setup_iommu(b, e500_pcihost_set_iommu, s); + pci_setup_iommu(b, &ppce500_iommu_ops, s); pci_create_simple(b, 0, "e500-host-bridge"); diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c index 9a11ac4b2b..86c3a49087 100644 --- a/hw/pci-host/raven.c +++ b/hw/pci-host/raven.c @@ -223,6 +223,10 @@ static AddressSpace *raven_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->bm_as; } +static const PCIIOMMUOps raven_iommu_ops = { + .get_address_space = raven_pcihost_set_iommu, +}; + static void raven_change_gpio(void *opaque, int n, int level) { PREPPCIState *s = opaque; @@ -320,7 +324,7 @@ static void raven_pcihost_initfn(Object *obj) memory_region_add_subregion(&s->bm, 0 , &s->bm_pci_memory_alias); memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias); address_space_init(&s->bm_as, &s->bm, "raven-bm"); - pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s); + pci_setup_iommu(&s->pci_bus, &raven_iommu_ops, s); h->bus = &s->pci_bus; diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c index dcb2e230b6..d0851b48b0 100644 --- a/hw/pci-host/sabre.c +++ b/hw/pci-host/sabre.c @@ -112,6 +112,10 @@ static AddressSpace *sabre_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } +static const PCIIOMMUOps sabre_iommu_ops = { + .get_address_space = sabre_pci_dma_iommu, +}; + static void sabre_config_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { @@ -384,7 +388,7 @@ static void sabre_realize(DeviceState *dev, Error **errp) /* IOMMU */ memory_region_add_subregion_overlap(&s->sabre_config, 0x200, sysbus_mmio_get_region(SYS_BUS_DEVICE(s->iommu), 0), 1); - pci_setup_iommu(phb->bus, sabre_pci_dma_iommu, s->iommu); + pci_setup_iommu(phb->bus, &sabre_iommu_ops, s->iommu); /* APB secondary busses */ pci_dev = pci_new_multifunction(PCI_DEVFN(1, 0), TYPE_SIMBA_PCI_BRIDGE); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 885c04b6f5..c49417abb2 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2678,7 +2678,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) PCIBus *iommu_bus = bus; uint8_t devfn = dev->devfn; - while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) { + while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) { PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); /* @@ -2717,15 +2717,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) { - return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn); + if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { + return iommu_bus->iommu_ops->get_address_space(bus, + iommu_bus->iommu_opaque, devfn); } return &address_space_memory; } -void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque) +void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { - bus->iommu_fn = fn; + /* + * If called, pci_setup_iommu() should provide a minimum set of + * useful callbacks for the bus. + */ + assert(ops); + assert(ops->get_address_space); + + bus->iommu_ops = ops; bus->iommu_opaque = opaque; } diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c index 672090de94..df4ee374d0 100644 --- a/hw/ppc/ppc440_pcix.c +++ b/hw/ppc/ppc440_pcix.c @@ -449,6 +449,10 @@ static AddressSpace *ppc440_pcix_set_iommu(PCIBus *b, void *opaque, int devfn) return &s->bm_as; } +static const PCIIOMMUOps ppc440_iommu_ops = { + .get_address_space = ppc440_pcix_set_iommu, +}; + /* * Some guests on sam460ex write all kinds of garbage here such as * missing enable bit and low bits set and still expect this to work @@ -503,7 +507,7 @@ static void ppc440_pcix_realize(DeviceState *dev, Error **errp) memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX); memory_region_add_subregion(&s->bm, 0x0, &s->busmem); address_space_init(&s->bm_as, &s->bm, "pci-bm"); - pci_setup_iommu(h->bus, ppc440_pcix_set_iommu, s); + pci_setup_iommu(h->bus, &ppc440_iommu_ops, s); memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE); memory_region_init_io(&h->conf_mem, OBJECT(s), &ppc440_pcix_host_conf_ops, diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 370c5a90f2..a27024e45a 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -780,6 +780,10 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +static const PCIIOMMUOps spapr_iommu_ops = { + .get_address_space = spapr_pci_dma_iommu, +}; + static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) { g_autofree char *path = NULL; @@ -1978,7 +1982,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW, &sphb->msiwindow); - pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb); + pci_setup_iommu(bus, &spapr_iommu_ops, sphb); pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq); diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 9016720547..f283f7e38d 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -18,14 +18,112 @@ */ #include "qemu/osdep.h" +#include <sys/ioctl.h> #include <linux/vfio.h> #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" #include "hw/pci/msix.h" #include "hw/pci/pci_device.h" -#include "hw/vfio/vfio.h" +#include "hw/vfio/vfio-common.h" #include "qemu/error-report.h" +/* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ +static bool vfio_eeh_container_ok(VFIOContainer *container) +{ + /* + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO + * implementation is broken if there are multiple groups in a + * container. The hardware works in units of Partitionable + * Endpoints (== IOMMU groups) and the EEH operations naively + * iterate across all groups in the container, without any logic + * to make sure the groups have their state synchronized. For + * certain operations (ENABLE) that might be ok, until an error + * occurs, but for others (GET_STATE) it's clearly broken. + */ + + /* + * XXX Once fixed kernels exist, test for them here + */ + + if (QLIST_EMPTY(&container->group_list)) { + return false; + } + + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { + return false; + } + + return true; +} + +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) +{ + struct vfio_eeh_pe_op pe_op = { + .argsz = sizeof(pe_op), + .op = op, + }; + int ret; + + if (!vfio_eeh_container_ok(container)) { + error_report("vfio/eeh: EEH_PE_OP 0x%x: " + "kernel requires a container with exactly one group", op); + return -EPERM; + } + + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); + if (ret < 0) { + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); + return -errno; + } + + return ret; +} + +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) +{ + VFIOAddressSpace *space = vfio_get_address_space(as); + VFIOContainer *container = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + + container = QLIST_FIRST(&space->containers); + + if (QLIST_NEXT(container, next)) { + /* + * We don't yet have logic to synchronize EEH state across + * multiple containers + */ + container = NULL; + goto out; + } + +out: + vfio_put_address_space(space); + return container; +} + +static bool vfio_eeh_as_ok(AddressSpace *as) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + return (container != NULL) && vfio_eeh_container_ok(container); +} + +static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + if (!container) { + return -ENODEV; + } + return vfio_eeh_container_op(container, op); +} + bool spapr_phb_eeh_available(SpaprPhbState *sphb) { return vfio_eeh_as_ok(&sphb->iommu_as); diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c index 1391dd712c..7c56aad0fc 100644 --- a/hw/remote/iommu.c +++ b/hw/remote/iommu.c @@ -100,6 +100,10 @@ static void remote_iommu_finalize(Object *obj) iommu->elem_by_devfn = NULL; } +static const PCIIOMMUOps remote_iommu_ops = { + .get_address_space = remote_iommu_find_add_as, +}; + void remote_iommu_setup(PCIBus *pci_bus) { RemoteIommu *iommu = NULL; @@ -108,7 +112,7 @@ void remote_iommu_setup(PCIBus *pci_bus) iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU)); - pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu); + pci_setup_iommu(pci_bus, &remote_iommu_ops, iommu); object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu)); diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 2ca36f9f3b..347580ebac 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -652,6 +652,10 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &iommu->as; } +static const PCIIOMMUOps s390_iommu_ops = { + .get_address_space = s390_pci_dma_iommu, +}; + static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set) { uint8_t expected, actual; @@ -839,7 +843,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) b = pci_register_root_bus(dev, NULL, s390_pci_set_irq, s390_pci_map_irq, NULL, get_system_memory(), get_system_io(), 0, 64, TYPE_PCI_BUS); - pci_setup_iommu(b, s390_pci_dma_iommu, s); + pci_setup_iommu(b, &s390_iommu_ops, s); bus = BUS(b); qbus_set_hotplug_handler(bus, OBJECT(dev)); @@ -1058,7 +1062,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, pdev = PCI_DEVICE(dev); pci_bridge_map_irq(pb, dev->id, s390_pci_map_irq); - pci_setup_iommu(&pb->sec_bus, s390_pci_dma_iommu, s); + pci_setup_iommu(&pb->sec_bus, &s390_iommu_ops, s); qbus_set_hotplug_handler(BUS(&pb->sec_bus), OBJECT(s)); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 5f257bffb9..bbf69ff55a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -14,7 +14,6 @@ #include <linux/vfio.h> #include <sys/ioctl.h> #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/ap-device.h" #include "qemu/error-report.h" diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 6623ae237b..d857bb8d0f 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -20,7 +20,6 @@ #include <sys/ioctl.h> #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/s390-ccw.h" #include "hw/s390x/vfio-ccw.h" diff --git a/hw/vfio/common.c b/hw/vfio/common.c index d806057b40..e70fdf5e0c 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -26,7 +26,6 @@ #include <linux/vfio.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/pci.h" #include "exec/address-spaces.h" #include "exec/memory.h" @@ -246,44 +245,6 @@ bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return true; } -void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova, uint64_t iova_pgsizes) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - min_iova, - max_iova - min_iova + 1)) { - hw_error("%s: Overlapped IOMMU are not enabled", __func__); - } - } - - hostwin = g_malloc0(sizeof(*hostwin)); - - hostwin->min_iova = min_iova; - hostwin->max_iova = max_iova; - hostwin->iova_pgsizes = iova_pgsizes; - QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); -} - -int vfio_host_win_del(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - return 0; - } - } - - return -1; -} - static bool vfio_listener_skipped_section(MemoryRegionSection *section) { return (!memory_region_is_ram(section->mr) && @@ -532,22 +493,6 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, g_free(vrdl); } -static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, - hwaddr iova, hwaddr end) -{ - VFIOHostDMAWindow *hostwin; - bool hostwin_found = false; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { - hostwin_found = true; - break; - } - } - - return hostwin_found ? hostwin : NULL; -} - static bool vfio_known_safe_misalignment(MemoryRegionSection *section) { MemoryRegion *mr = section->mr; @@ -626,7 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, Int128 llend, llsize; void *vaddr; int ret; - VFIOHostDMAWindow *hostwin; Error *err = NULL; if (!vfio_listener_valid_section(section, "region_add")) { @@ -648,13 +592,6 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } - hostwin = vfio_find_hostwin(container, iova, end); - if (!hostwin) { - error_setg(&err, "Container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end); - goto fail; - } - memory_region_ref(section->mr); if (memory_region_is_iommu(section->mr)) { @@ -693,6 +630,15 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } + if (container->iova_ranges) { + ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, + container->iova_ranges, &err); + if (ret) { + g_free(giommu); + goto fail; + } + } + ret = memory_region_register_iommu_notifier(section->mr, &giommu->n, &err); if (ret) { @@ -726,7 +672,7 @@ static void vfio_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); if (memory_region_is_ram_device(section->mr)) { - hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { trace_vfio_listener_region_add_no_dma_map( @@ -825,12 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, if (memory_region_is_ram_device(section->mr)) { hwaddr pgmask; - VFIOHostDMAWindow *hostwin; - - hostwin = vfio_find_hostwin(container, iova, end); - assert(hostwin); /* or region_add() would have failed */ - pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + pgmask = (1ULL << ctz64(container->pgsizes)) - 1; try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); } else if (memory_region_has_ram_discard_manager(section->mr)) { vfio_unregister_ram_discard_listener(container, section); diff --git a/hw/vfio/container.c b/hw/vfio/container.c index adc467210f..242010036a 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -20,20 +20,15 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#ifdef CONFIG_KVM -#include <linux/kvm.h> -#endif #include <linux/vfio.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "exec/address-spaces.h" #include "exec/memory.h" #include "exec/ram_addr.h" #include "hw/hw.h" #include "qemu/error-report.h" #include "qemu/range.h" -#include "sysemu/kvm.h" #include "sysemu/reset.h" #include "trace.h" #include "qapi/error.h" @@ -205,92 +200,6 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, return -errno; } -int vfio_container_add_section_window(VFIOContainer *container, - MemoryRegionSection *section, - Error **errp) -{ - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; - - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return 0; - } - - /* For now intersections are not allowed, we may relax this later */ - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - section->offset_within_address_space, - int128_get64(section->size))) { - error_setg(errp, - "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" - "host DMA window [0x%"PRIx64",0x%"PRIx64"]", - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, - hostwin->min_iova, hostwin->max_iova); - return -EINVAL; - } - } - - ret = vfio_spapr_create_window(container, section, &pgsize); - if (ret) { - error_setg_errno(errp, -ret, "Failed to create SPAPR window"); - return ret; - } - - vfio_host_win_add(container, section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, pgsize); -#ifdef CONFIG_KVM - if (kvm_enabled()) { - VFIOGroup *group; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); - struct kvm_vfio_spapr_tce param; - struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, - .addr = (uint64_t)(unsigned long)¶m, - }; - - if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, - ¶m.tablefd)) { - QLIST_FOREACH(group, &container->group_list, container_next) { - param.groupfd = group->fd; - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_setg_errno(errp, errno, - "vfio: failed GROUP_SET_SPAPR_TCE for " - "KVM VFIO device %d and group fd %d", - param.tablefd, param.groupfd); - return -errno; - } - trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); - } - } - } -#endif - return 0; -} - -void vfio_container_del_section_window(VFIOContainer *container, - MemoryRegionSection *section) -{ - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; - } - - vfio_spapr_remove_window(container, - section->offset_within_address_space); - if (vfio_host_win_del(container, - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1) < 0) { - hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, - __func__, section->offset_within_address_space); - } -} - int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) { int ret; @@ -355,14 +264,6 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, return ret; } -static void vfio_listener_release(VFIOContainer *container) -{ - memory_listener_unregister(&container->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - memory_listener_unregister(&container->prereg_listener); - } -} - static struct vfio_info_cap_header * vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) { @@ -382,7 +283,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, /* If the capability cannot be found, assume no DMA limiting */ hdr = vfio_get_iommu_type1_info_cap(info, VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); - if (hdr == NULL) { + if (!hdr) { return false; } @@ -394,6 +295,32 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, return true; } +static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + VFIOContainer *container) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; + + hdr = vfio_get_iommu_type1_info_cap(info, + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + if (!hdr) { + return false; + } + + cap = (void *)hdr; + + for (int i = 0; i < cap->nr_iovas; i++) { + Range *range = g_new(Range, 1); + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); + container->iova_ranges = + range_list_insert(container->iova_ranges, range); + } + + return true; +} + static void vfio_kvm_device_add_group(VFIOGroup *group) { Error *err = NULL; @@ -535,6 +462,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, } } +static void vfio_free_container(VFIOContainer *container) +{ + g_list_free_full(container->iova_ranges, g_free); + g_free(container); +} + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, Error **errp) { @@ -616,8 +549,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->error = NULL; container->dirty_pages_supported = false; container->dma_max_mappings = 0; + container->iova_ranges = NULL; QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); ret = vfio_init_container(container, group->fd, errp); @@ -652,84 +585,21 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { container->dma_max_mappings = 65535; } - vfio_get_iommu_info_migration(container, info); - g_free(info); - /* - * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE - * information to get the actual window extent rather than assume - * a 64-bit IOVA address space. - */ - vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes); + vfio_get_info_iova_range(info, container); + vfio_get_iommu_info_migration(container, info); + g_free(info); break; } case VFIO_SPAPR_TCE_v2_IOMMU: case VFIO_SPAPR_TCE_IOMMU: { - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called - * when container fd is closed so we do not call it explicitly - * in this file. - */ - if (!v2) { - ret = ioctl(fd, VFIO_IOMMU_ENABLE); - if (ret) { - error_setg_errno(errp, errno, "failed to enable container"); - ret = -errno; - goto enable_discards_exit; - } - } else { - container->prereg_listener = vfio_prereg_listener; - - memory_listener_register(&container->prereg_listener, - &address_space_memory); - if (container->error) { - memory_listener_unregister(&container->prereg_listener); - ret = -1; - error_propagate_prepend(errp, container->error, - "RAM memory listener initialization failed: "); - goto enable_discards_exit; - } - } - - info.argsz = sizeof(info); - ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + ret = vfio_spapr_container_init(container, errp); if (ret) { - error_setg_errno(errp, errno, - "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); - ret = -errno; - if (v2) { - memory_listener_unregister(&container->prereg_listener); - } goto enable_discards_exit; } - - if (v2) { - container->pgsizes = info.ddw.pgsizes; - /* - * There is a default window in just created container. - * To make region_add/del simpler, we better remove this - * window now and let those iommu_listener callbacks - * create/remove them when needed. - */ - ret = vfio_spapr_remove_window(container, info.dma32_window_start); - if (ret) { - error_setg_errno(errp, -ret, - "failed to remove existing window"); - goto enable_discards_exit; - } - } else { - /* The default table uses 4K pages */ - container->pgsizes = 0x1000; - vfio_host_win_add(container, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, - 0x1000); - } + break; } } @@ -759,13 +629,17 @@ listener_release_exit: QLIST_REMOVE(group, container_next); QLIST_REMOVE(container, next); vfio_kvm_device_del_group(group); - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } enable_discards_exit: vfio_ram_block_discard_disable(container, false); free_container_exit: - g_free(container); + vfio_free_container(container); close_fd_exit: close(fd); @@ -789,7 +663,11 @@ static void vfio_disconnect_container(VFIOGroup *group) * group. */ if (QLIST_EMPTY(&container->group_list)) { - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } } if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { @@ -800,7 +678,6 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; - VFIOHostDMAWindow *hostwin, *next; QLIST_REMOVE(container, next); @@ -811,15 +688,9 @@ static void vfio_disconnect_container(VFIOGroup *group) g_free(giommu); } - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, - next) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - } - trace_vfio_disconnect_container(container->fd); close(container->fd); - g_free(container); + vfio_free_container(container); vfio_put_address_space(space); } @@ -975,103 +846,6 @@ static void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } -/* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -static bool vfio_eeh_container_ok(VFIOContainer *container) -{ - /* - * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO - * implementation is broken if there are multiple groups in a - * container. The hardware works in units of Partitionable - * Endpoints (== IOMMU groups) and the EEH operations naively - * iterate across all groups in the container, without any logic - * to make sure the groups have their state synchronized. For - * certain operations (ENABLE) that might be ok, until an error - * occurs, but for others (GET_STATE) it's clearly broken. - */ - - /* - * XXX Once fixed kernels exist, test for them here - */ - - if (QLIST_EMPTY(&container->group_list)) { - return false; - } - - if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { - return false; - } - - return true; -} - -static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) -{ - struct vfio_eeh_pe_op pe_op = { - .argsz = sizeof(pe_op), - .op = op, - }; - int ret; - - if (!vfio_eeh_container_ok(container)) { - error_report("vfio/eeh: EEH_PE_OP 0x%x: " - "kernel requires a container with exactly one group", op); - return -EPERM; - } - - ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); - if (ret < 0) { - error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); - return -errno; - } - - return ret; -} - -static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) -{ - VFIOAddressSpace *space = vfio_get_address_space(as); - VFIOContainer *container = NULL; - - if (QLIST_EMPTY(&space->containers)) { - /* No containers to act on */ - goto out; - } - - container = QLIST_FIRST(&space->containers); - - if (QLIST_NEXT(container, next)) { - /* - * We don't yet have logic to synchronize EEH state across - * multiple containers - */ - container = NULL; - goto out; - } - -out: - vfio_put_address_space(space); - return container; -} - -bool vfio_eeh_as_ok(AddressSpace *as) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - return (container != NULL) && vfio_eeh_container_ok(container); -} - -int vfio_eeh_as_op(AddressSpace *as, uint32_t op) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - if (!container) { - return -ENODEV; - } - return vfio_eeh_container_op(container, op); -} - static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) { char *tmp, group_path[PATH_MAX], *group_name; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 7e5da21b31..168847e7c5 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -23,7 +23,6 @@ #include <sys/ioctl.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/hw.h" #include "trace.h" #include "qapi/error.h" diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index b27011cee7..c62c02f7b6 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3081,7 +3081,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) struct stat st; int i, ret; bool is_mdev; - char uuid[UUID_FMT_LEN]; + char uuid[UUID_STR_LEN]; char *name; if (!vbasedev->sysfsdev) { diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 9ec1e95f6d..83da2f7ec2 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -11,6 +11,11 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> #include <linux/vfio.h> +#ifdef CONFIG_KVM +#include <linux/kvm.h> +#endif +#include "sysemu/kvm.h" +#include "exec/address-spaces.h" #include "hw/vfio/vfio-common.h" #include "hw/hw.h" @@ -135,15 +140,90 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener, trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0); } -const MemoryListener vfio_prereg_listener = { +static const MemoryListener vfio_prereg_listener = { .name = "vfio-pre-reg", .region_add = vfio_prereg_listener_region_add, .region_del = vfio_prereg_listener_region_del, }; -int vfio_spapr_create_window(VFIOContainer *container, - MemoryRegionSection *section, - hwaddr *pgsize) +static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + min_iova, + max_iova - min_iova + 1)) { + hw_error("%s: Overlapped IOMMU are not enabled", __func__); + } + } + + hostwin = g_malloc0(sizeof(*hostwin)); + + hostwin->min_iova = min_iova; + hostwin->max_iova = max_iova; + hostwin->iova_pgsizes = iova_pgsizes; + QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); +} + +static int vfio_host_win_del(VFIOContainer *container, + hwaddr min_iova, hwaddr max_iova) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + return 0; + } + } + + return -1; +} + +static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, + hwaddr iova, hwaddr end) +{ + VFIOHostDMAWindow *hostwin; + bool hostwin_found = false; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { + hostwin_found = true; + break; + } + } + + return hostwin_found ? hostwin : NULL; +} + +static int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space) +{ + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + .start_addr = offset_within_address_space, + }; + int ret; + + ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + if (ret) { + error_report("Failed to remove window at %"PRIx64, + (uint64_t)remove.start_addr); + return -errno; + } + + trace_vfio_spapr_remove_window(offset_within_address_space); + + return 0; +} + +static int vfio_spapr_create_window(VFIOContainer *container, + MemoryRegionSection *section, + hwaddr *pgsize) { int ret = 0; IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); @@ -233,23 +313,195 @@ int vfio_spapr_create_window(VFIOContainer *container, return 0; } -int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space) +int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, + Error **errp) { - struct vfio_iommu_spapr_tce_remove remove = { - .argsz = sizeof(remove), - .start_addr = offset_within_address_space, - }; + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; int ret; - ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + /* + * VFIO_SPAPR_TCE_IOMMU supports a single host window between + * [dma32_window_start, dma32_window_size), we need to ensure + * the section fall in this range. + */ + if (container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + hwaddr iova, end; + + iova = section->offset_within_address_space; + end = iova + int128_get64(section->size) - 1; + + if (!vfio_find_hostwin(container, iova, end)) { + error_setg(errp, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, + iova, end); + return -EINVAL; + } + return 0; + } + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return 0; + } + + /* For now intersections are not allowed, we may relax this later */ + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, + int128_get64(section->size))) { + error_setg(errp, + "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" + "host DMA window [0x%"PRIx64",0x%"PRIx64"]", + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, + hostwin->min_iova, hostwin->max_iova); + return -EINVAL; + } + } + + ret = vfio_spapr_create_window(container, section, &pgsize); if (ret) { - error_report("Failed to remove window at %"PRIx64, - (uint64_t)remove.start_addr); - return -errno; + error_setg_errno(errp, -ret, "Failed to create SPAPR window"); + return ret; } - trace_vfio_spapr_remove_window(offset_within_address_space); + vfio_host_win_add(container, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); +#ifdef CONFIG_KVM + if (kvm_enabled()) { + VFIOGroup *group; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + struct kvm_vfio_spapr_tce param; + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, + .addr = (uint64_t)(unsigned long)¶m, + }; + + if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, + ¶m.tablefd)) { + QLIST_FOREACH(group, &container->group_list, container_next) { + param.groupfd = group->fd; + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_setg_errno(errp, errno, + "vfio: failed GROUP_SET_SPAPR_TCE for " + "KVM VFIO device %d and group fd %d", + param.tablefd, param.groupfd); + return -errno; + } + trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); + } + } + } +#endif + return 0; +} + +void vfio_container_del_section_window(VFIOContainer *container, + MemoryRegionSection *section) +{ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } + + vfio_spapr_remove_window(container, + section->offset_within_address_space); + if (vfio_host_win_del(container, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { + hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, + __func__, section->offset_within_address_space); + } +} + +int vfio_spapr_container_init(VFIOContainer *container, Error **errp) +{ + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; + + QLIST_INIT(&container->hostwin_list); + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + if (!v2) { + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_setg_errno(errp, errno, "failed to enable container"); + return -errno; + } + } else { + container->prereg_listener = vfio_prereg_listener; + + memory_listener_register(&container->prereg_listener, + &address_space_memory); + if (container->error) { + ret = -1; + error_propagate_prepend(errp, container->error, + "RAM memory listener initialization failed: "); + goto listener_unregister_exit; + } + } + + info.argsz = sizeof(info); + ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + error_setg_errno(errp, errno, + "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); + ret = -errno; + goto listener_unregister_exit; + } + + if (v2) { + container->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this + * window now and let those iommu_listener callbacks + * create/remove them when needed. + */ + ret = vfio_spapr_remove_window(container, info.dma32_window_start); + if (ret) { + error_setg_errno(errp, -ret, + "failed to remove existing window"); + goto listener_unregister_exit; + } + } else { + /* The default table uses 4K pages */ + container->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); + } return 0; + +listener_unregister_exit: + if (v2) { + memory_listener_unregister(&container->prereg_listener); + } + return ret; +} + +void vfio_spapr_container_deinit(VFIOContainer *container) +{ + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + memory_listener_unregister(&container->prereg_listener); + } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } } diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 0af7a2886c..637cac4edf 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -135,6 +135,7 @@ virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 +virtio_iommu_host_resv_regions(const char *name, uint32_t index, uint64_t lob, uint64_t upb) "mr=%s host-resv-reg[%d] = [0x%"PRIx64",0x%"PRIx64"]" # virtio-mem.c virtio_mem_send_response(uint16_t type) "type=%" PRIu16 diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c index 7ef2f9dcdb..9459fbf6ed 100644 --- a/hw/virtio/virtio-iommu-pci.c +++ b/hw/virtio/virtio-iommu-pci.c @@ -37,7 +37,7 @@ struct VirtIOIOMMUPCI { static Property virtio_iommu_pci_properties[] = { DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), DEFINE_PROP_ARRAY("reserved-regions", VirtIOIOMMUPCI, - vdev.nb_reserved_regions, vdev.reserved_regions, + vdev.nr_prop_resv_regions, vdev.prop_resv_regions, qdev_prop_reserved_region, ReservedRegion), DEFINE_PROP_END_OF_LIST(), }; @@ -54,9 +54,9 @@ static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) "for the virtio-iommu-pci device"); return; } - for (int i = 0; i < s->nb_reserved_regions; i++) { - if (s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED && - s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) { + for (int i = 0; i < s->nr_prop_resv_regions; i++) { + if (s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED && + s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) { error_setg(errp, "reserved region %d has an invalid type", i); error_append_hint(errp, "Valid values are 0 and 1\n"); return; diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index be51635895..89fb5767d1 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -20,12 +20,15 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/iov.h" +#include "qemu/range.h" +#include "qemu/reserved-region.h" #include "exec/target_page.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" #include "sysemu/sysemu.h" +#include "qemu/reserved-region.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "trace.h" @@ -378,6 +381,19 @@ static void virtio_iommu_put_domain(gpointer data) g_free(domain); } +static void add_prop_resv_regions(IOMMUDevice *sdev) +{ + VirtIOIOMMU *s = sdev->viommu; + int i; + + for (i = 0; i < s->nr_prop_resv_regions; i++) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + + *reg = s->prop_resv_regions[i]; + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + } +} + static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, int devfn) { @@ -408,6 +424,7 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); + add_prop_resv_regions(sdev); /* * Build the IOMMU disabled container with aliases to the @@ -444,6 +461,10 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static const PCIIOMMUOps virtio_iommu_ops = { + .get_address_space = virtio_iommu_find_add_as, +}; + static int virtio_iommu_attach(VirtIOIOMMU *s, struct virtio_iommu_req_attach *req) { @@ -624,29 +645,30 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s, return ret; } -static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep, +static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep, uint8_t *buf, size_t free) { struct virtio_iommu_probe_resv_mem prop = {}; size_t size = sizeof(prop), length = size - sizeof(prop.head), total; - int i; - - total = size * s->nb_reserved_regions; + GList *l; + total = size * g_list_length(sdev->resv_regions); if (total > free) { return -ENOSPC; } - for (i = 0; i < s->nb_reserved_regions; i++) { - unsigned subtype = s->reserved_regions[i].type; + for (l = sdev->resv_regions; l; l = l->next) { + ReservedRegion *reg = l->data; + unsigned subtype = reg->type; + Range *range = ®->range; assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); prop.head.length = cpu_to_le16(length); prop.subtype = subtype; - prop.start = cpu_to_le64(s->reserved_regions[i].low); - prop.end = cpu_to_le64(s->reserved_regions[i].high); + prop.start = cpu_to_le64(range_lob(range)); + prop.end = cpu_to_le64(range_upb(range)); memcpy(buf, &prop, size); @@ -666,19 +688,27 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, uint8_t *buf) { uint32_t ep_id = le32_to_cpu(req->endpoint); + IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id); size_t free = VIOMMU_PROBE_SIZE; + IOMMUDevice *sdev; ssize_t count; - if (!virtio_iommu_mr(s, ep_id)) { + if (!iommu_mr) { return VIRTIO_IOMMU_S_NOENT; } - count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free); + sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr); + if (!sdev) { + return -EINVAL; + } + + count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free); if (count < 0) { return VIRTIO_IOMMU_S_INVAL; } buf += count; free -= count; + sdev->probe_done = true; return VIRTIO_IOMMU_S_OK; } @@ -856,7 +886,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, bool bypass_allowed; int granule; bool found; - int i; + GList *l; interval.low = addr; interval.high = addr + 1; @@ -894,10 +924,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, goto unlock; } - for (i = 0; i < s->nb_reserved_regions; i++) { - ReservedRegion *reg = &s->reserved_regions[i]; + for (l = sdev->resv_regions; l; l = l->next) { + ReservedRegion *reg = l->data; - if (addr >= reg->low && addr <= reg->high) { + if (range_contains(®->range, addr)) { switch (reg->type) { case VIRTIO_IOMMU_RESV_MEM_T_MSI: entry.perm = flag; @@ -1131,6 +1161,106 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, return 0; } +/** + * rebuild_resv_regions: rebuild resv regions with both the + * info of host resv ranges and property set resv ranges + */ +static int rebuild_resv_regions(IOMMUDevice *sdev) +{ + GList *l; + int i = 0; + + /* free the existing list and rebuild it from scratch */ + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + + /* First add host reserved regions if any, all tagged as RESERVED */ + for (l = sdev->host_resv_ranges; l; l = l->next) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + Range *r = (Range *)l->data; + + reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; + range_set_bounds(®->range, range_lob(r), range_upb(r)); + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, + range_lob(®->range), + range_upb(®->range)); + i++; + } + /* + * then add higher priority reserved regions set by the machine + * through properties + */ + add_prop_resv_regions(sdev); + return 0; +} + +/** + * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges + * + * The function turns those into reserved ranges. Once some + * reserved ranges have been set, new reserved regions cannot be + * added outside of the original ones. + * + * @mr: IOMMU MR + * @iova_ranges: list of usable IOVA ranges + * @errp: error handle + */ +static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, + GList *iova_ranges, + Error **errp) +{ + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); + GList *current_ranges = sdev->host_resv_ranges; + GList *l, *tmp, *new_ranges = NULL; + int ret = -EINVAL; + + /* check that each new resv region is included in an existing one */ + if (sdev->host_resv_ranges) { + range_inverse_array(iova_ranges, + &new_ranges, + 0, UINT64_MAX); + + for (tmp = new_ranges; tmp; tmp = tmp->next) { + Range *newr = (Range *)tmp->data; + bool included = false; + + for (l = current_ranges; l; l = l->next) { + Range * r = (Range *)l->data; + + if (range_contains_range(r, newr)) { + included = true; + break; + } + } + if (!included) { + goto error; + } + } + /* all new reserved ranges are included in existing ones */ + ret = 0; + goto out; + } + + if (sdev->probe_done) { + warn_report("%s: Notified about new host reserved regions after probe", + mr->parent_obj.name); + } + + range_inverse_array(iova_ranges, + &sdev->host_resv_ranges, + 0, UINT64_MAX); + rebuild_resv_regions(sdev); + + return 0; +error: + error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", + mr->parent_obj.name); +out: + g_list_free_full(new_ranges, g_free); + return ret; +} + static void virtio_iommu_system_reset(void *opaque) { VirtIOIOMMU *s = opaque; @@ -1206,7 +1336,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); if (s->primary_bus) { - pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); + pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); } else { error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); } @@ -1426,6 +1556,7 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; + imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; } static const TypeInfo virtio_iommu_info = { diff --git a/include/exec/memory.h b/include/exec/memory.h index 9087d02769..831f7c996d 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -24,6 +24,7 @@ #include "qemu/bswap.h" #include "qemu/queue.h" #include "qemu/int128.h" +#include "qemu/range.h" #include "qemu/notify.h" #include "qom/object.h" #include "qemu/rcu.h" @@ -79,8 +80,7 @@ extern unsigned int global_dirty_tracking; typedef struct MemoryRegionOps MemoryRegionOps; struct ReservedRegion { - hwaddr low; - hwaddr high; + Range range; unsigned type; }; @@ -527,6 +527,26 @@ struct IOMMUMemoryRegionClass { int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu, uint64_t page_size_mask, Error **errp); + /** + * @iommu_set_iova_ranges: + * + * Propagate information about the usable IOVA ranges for a given IOMMU + * memory region. Used for example to propagate host physical device + * reserved memory region constraints to the virtual IOMMU. + * + * Optional method: if this method is not provided, then the default IOVA + * aperture is used. + * + * @iommu: the IOMMUMemoryRegion + * + * @iova_ranges: list of ordered IOVA ranges (at least one range) + * + * Returns 0 on success, or a negative error. In case of failure, the error + * object must be created. + */ + int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu, + GList *iova_ranges, + Error **errp); }; typedef struct RamDiscardListener RamDiscardListener; @@ -1857,6 +1877,18 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, Error **errp); /** + * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges + * for a given IOMMU MR region + * + * @iommu: IOMMU memory region + * @iova_ranges: list of ordered IOVA ranges (at least one range) + * @errp: pointer to Error*, to store an error if it happens. + */ +int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu, + GList *iova_ranges, + Error **errp); + +/** * memory_region_name: get a memory region's name * * Returns the string that was used to initialize the memory region. diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index ea5aff118b..fa6313aabc 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -363,10 +363,42 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range); void pci_device_deassert_intx(PCIDevice *dev); -typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int); + +/** + * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers + * of a PCIBus + * + * Allows to modify the behavior of some IOMMU operations of the PCI + * framework for a set of devices on a PCI bus. + */ +typedef struct PCIIOMMUOps { + /** + * @get_address_space: get the address space for a set of devices + * on a PCI bus. + * + * Mandatory callback which returns a pointer to an #AddressSpace + * + * @bus: the #PCIBus being accessed. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number + */ + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); +} PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); -void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque); + +/** + * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus + * + * Let PCI host bridges define specific operations. + * + * @bus: the #PCIBus being updated. + * @ops: the #PCIIOMMUOps + * @opaque: passed to callbacks of the @ops structure. + */ +void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque); pcibus_t pci_bar_address(PCIDevice *d, int reg, uint8_t type, pcibus_t size); diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index 5653175957..2261312546 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -33,7 +33,7 @@ enum PCIBusFlags { struct PCIBus { BusState qbus; enum PCIBusFlags flags; - PCIIOMMUFunc iommu_fn; + const PCIIOMMUOps *iommu_ops; void *iommu_opaque; uint8_t devfn_min; uint32_t slot_reserved_mask; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 7780b9073a..a4a22accb9 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -99,6 +99,7 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; QLIST_ENTRY(VFIOContainer) next; QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; } VFIOContainer; typedef struct VFIOGuestIOMMU { @@ -206,11 +207,6 @@ typedef struct { hwaddr pages; } VFIOBitmap; -void vfio_host_win_add(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova, - uint64_t iova_pgsizes); -int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova); VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); void vfio_put_address_space(VFIOAddressSpace *space); bool vfio_devices_all_running_and_saving(VFIOContainer *container); @@ -224,11 +220,14 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, hwaddr iova, hwaddr size); +/* SPAPR specific */ int vfio_container_add_section_window(VFIOContainer *container, MemoryRegionSection *section, Error **errp); void vfio_container_del_section_window(VFIOContainer *container, MemoryRegionSection *section); +int vfio_spapr_container_init(VFIOContainer *container, Error **errp); +void vfio_spapr_container_deinit(VFIOContainer *container); void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); @@ -288,13 +287,6 @@ vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); struct vfio_info_cap_header * vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); #endif -extern const MemoryListener vfio_prereg_listener; - -int vfio_spapr_create_window(VFIOContainer *container, - MemoryRegionSection *section, - hwaddr *pgsize); -int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space); bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); void vfio_migration_exit(VFIODevice *vbasedev); diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h deleted file mode 100644 index 86248f5436..0000000000 --- a/include/hw/vfio/vfio.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef HW_VFIO_H -#define HW_VFIO_H - -bool vfio_eeh_as_ok(AddressSpace *as); -int vfio_eeh_as_op(AddressSpace *as, uint32_t op); - -#endif diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index a93fc5383e..781ebaea8f 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -39,6 +39,9 @@ typedef struct IOMMUDevice { AddressSpace as; MemoryRegion root; /* The root container of the device */ MemoryRegion bypass_mr; /* The alias of shared memory MR */ + GList *resv_regions; + GList *host_resv_ranges; + bool probe_done; } IOMMUDevice; typedef struct IOMMUPciBus { @@ -55,8 +58,8 @@ struct VirtIOIOMMU { GHashTable *as_by_busptr; IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; PCIBus *primary_bus; - ReservedRegion *reserved_regions; - uint32_t nb_reserved_regions; + ReservedRegion *prop_resv_regions; + uint32_t nr_prop_resv_regions; GTree *domains; QemuRecMutex mutex; GTree *endpoints; diff --git a/include/qemu/range.h b/include/qemu/range.h index 7e2b1cc447..205e1da76d 100644 --- a/include/qemu/range.h +++ b/include/qemu/range.h @@ -217,6 +217,20 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1, return !(last2 < first1 || last1 < first2); } +/* + * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap. + * Both @a and @b must not be empty. + */ +int range_compare(Range *a, Range *b); + GList *range_list_insert(GList *list, Range *data); +/* + * Inverse an array of sorted ranges over the [low, high] span, ie. + * original ranges becomes holes in the newly allocated inv_ranges + */ +void range_inverse_array(GList *in_ranges, + GList **out_ranges, + uint64_t low, uint64_t high); + #endif diff --git a/include/qemu/reserved-region.h b/include/qemu/reserved-region.h new file mode 100644 index 0000000000..8e6f0a97e2 --- /dev/null +++ b/include/qemu/reserved-region.h @@ -0,0 +1,32 @@ +/* + * QEMU ReservedRegion helpers + * + * Copyright (c) 2023 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef QEMU_RESERVED_REGION_H +#define QEMU_RESERVED_REGION_H + +#include "exec/memory.h" + +/* + * Insert a new region into a sorted list of reserved regions. In case + * there is overlap with existing regions, the new added region has + * higher priority and replaces the overlapped segment. + */ +GList *resv_region_list_insert(GList *list, ReservedRegion *reg); + +#endif diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h index e24a1099e4..869f84af09 100644 --- a/include/qemu/uuid.h +++ b/include/qemu/uuid.h @@ -78,9 +78,10 @@ typedef struct { "%02hhx%02hhx-" \ "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" -#define UUID_FMT_LEN 36 - #define UUID_NONE "00000000-0000-0000-0000-000000000000" +QEMU_BUILD_BUG_ON(sizeof(UUID_NONE) - 1 != 36); + +#define UUID_STR_LEN sizeof(UUID_NONE) void qemu_uuid_generate(QemuUUID *out); diff --git a/migration/savevm.c b/migration/savevm.c index bc98c2ea6f..eec5503a42 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -471,8 +471,8 @@ static bool vmstate_uuid_needed(void *opaque) static int vmstate_uuid_post_load(void *opaque, int version_id) { SaveState *state = opaque; - char uuid_src[UUID_FMT_LEN + 1]; - char uuid_dst[UUID_FMT_LEN + 1]; + char uuid_src[UUID_STR_LEN]; + char uuid_dst[UUID_STR_LEN]; if (!qemu_uuid_set) { /* diff --git a/system/memory.c b/system/memory.c index 4928f2525d..304fa843ea 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1921,6 +1921,19 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, return ret; } +int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr, + GList *iova_ranges, + Error **errp) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + int ret = 0; + + if (imrc->iommu_set_iova_ranges) { + ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp); + } + return ret; +} + int memory_region_register_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n, Error **errp) { diff --git a/tests/unit/meson.build b/tests/unit/meson.build index f33ae64b8d..e6c51e7a86 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -21,6 +21,7 @@ tests = { 'test-opts-visitor': [testqapi], 'test-visitor-serialization': [testqapi], 'test-bitmap': [], + 'test-resv-mem': [], # all code tested by test-x86-cpuid is inside topology.h 'test-x86-cpuid': [], 'test-cutils': [], diff --git a/tests/unit/test-resv-mem.c b/tests/unit/test-resv-mem.c new file mode 100644 index 0000000000..5963274e2c --- /dev/null +++ b/tests/unit/test-resv-mem.c @@ -0,0 +1,316 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * reserved-region/range.c unit-tests. + * + * Copyright (C) 2023, Red Hat, Inc. + * + * Author: Eric Auger <eric.auger@redhat.com> + */ + +#include "qemu/osdep.h" +#include "qemu/range.h" +#include "exec/memory.h" +#include "qemu/reserved-region.h" + +#define DEBUG 0 + +#if DEBUG +static void print_ranges(const char *prefix, GList *ranges) +{ + GList *l; + int i = 0; + + if (!g_list_length(ranges)) { + printf("%s is void\n", prefix); + return; + } + for (l = ranges; l; l = l->next) { + Range *r = (Range *)l->data; + + printf("%s rev[%i] = [0x%"PRIx64",0x%"PRIx64"]\n", + prefix, i, range_lob(r), range_upb(r)); + i++; + } +} +#endif + +static void compare_ranges(const char *prefix, GList *ranges, + GList *expected) +{ + GList *l, *e; + +#if DEBUG + print_ranges("out", ranges); + print_ranges("expected", expected); +#endif + g_assert_cmpint(g_list_length(ranges), ==, g_list_length(expected)); + for (l = ranges, e = expected; l ; l = l->next, e = e->next) { + Range *r = (Range *)l->data; + Range *er = (Range *)e->data; + + g_assert_true(range_lob(r) == range_lob(er) && + range_upb(r) == range_upb(er)); + } +} + +static GList *insert_sorted_range(GList *list, uint64_t lob, uint64_t upb) +{ + Range *new = g_new0(Range, 1); + + range_set_bounds(new, lob, upb); + return range_list_insert(list, new); +} + +static void reset(GList **in, GList **out, GList **expected) +{ + g_list_free_full(*in, g_free); + g_list_free_full(*out, g_free); + g_list_free_full(*expected, g_free); + *in = NULL; + *out = NULL; + *expected = NULL; +} + +static void +run_range_inverse_array(const char *prefix, GList **in, GList **expected, + uint64_t low, uint64_t high) +{ + GList *out = NULL; + range_inverse_array(*in, &out, low, high); + compare_ranges(prefix, out, *expected); + reset(in, &out, expected); +} + +static void check_range_reverse_array(void) +{ + GList *in = NULL, *expected = NULL; + + /* test 1 */ + + in = insert_sorted_range(in, 0x10000, UINT64_MAX); + expected = insert_sorted_range(expected, 0x0, 0xFFFF); + run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX); + + /* test 2 */ + + in = insert_sorted_range(in, 0x10000, 0xFFFFFFFFFFFF); + expected = insert_sorted_range(expected, 0x0, 0xFFFF); + expected = insert_sorted_range(expected, 0x1000000000000, UINT64_MAX); + run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX); + + /* test 3 */ + + in = insert_sorted_range(in, 0x0, 0xFFFF); + in = insert_sorted_range(in, 0x10000, 0x2FFFF); + expected = insert_sorted_range(expected, 0x30000, UINT64_MAX); + run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX); + + /* test 4 */ + + in = insert_sorted_range(in, 0x50000, 0x5FFFF); + in = insert_sorted_range(in, 0x60000, 0xFFFFFFFFFFFF); + expected = insert_sorted_range(expected, 0x0, 0x4FFFF); + expected = insert_sorted_range(expected, 0x1000000000000, UINT64_MAX); + run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX); + + /* test 5 */ + + in = insert_sorted_range(in, 0x0, UINT64_MAX); + run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX); + + /* test 6 */ + in = insert_sorted_range(in, 0x10000, 0x1FFFF); + in = insert_sorted_range(in, 0x30000, 0x6FFFF); + in = insert_sorted_range(in, 0x90000, UINT64_MAX); + expected = insert_sorted_range(expected, 0x0, 0xFFFF); + expected = insert_sorted_range(expected, 0x20000, 0x2FFFF); + expected = insert_sorted_range(expected, 0x70000, 0x8FFFF); + run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX); +} + +static void check_range_reverse_array_low_end(void) +{ + GList *in = NULL, *expected = NULL; + + /* test 1 */ + in = insert_sorted_range(in, 0x0, UINT64_MAX); + run_range_inverse_array("test1", &in, &expected, 0x10000, 0xFFFFFF); + + /* test 2 */ + + in = insert_sorted_range(in, 0x0, 0xFFFF); + in = insert_sorted_range(in, 0x20000, 0x2FFFF); + expected = insert_sorted_range(expected, 0x40000, 0xFFFFFFFFFFFF); + run_range_inverse_array("test2", &in, &expected, 0x40000, 0xFFFFFFFFFFFF); + + /* test 3 */ + in = insert_sorted_range(in, 0x0, 0xFFFF); + in = insert_sorted_range(in, 0x20000, 0x2FFFF); + in = insert_sorted_range(in, 0x1000000000000, UINT64_MAX); + expected = insert_sorted_range(expected, 0x40000, 0xFFFFFFFFFFFF); + run_range_inverse_array("test3", &in, &expected, 0x40000, 0xFFFFFFFFFFFF); + + /* test 4 */ + + in = insert_sorted_range(in, 0x0, 0xFFFF); + in = insert_sorted_range(in, 0x20000, 0x2FFFF); + in = insert_sorted_range(in, 0x1000000000000, UINT64_MAX); + expected = insert_sorted_range(expected, 0x30000, 0xFFFFFFFFFFFF); + run_range_inverse_array("test4", &in, &expected, 0x20000, 0xFFFFFFFFFFFF); + + /* test 5 */ + + in = insert_sorted_range(in, 0x2000, 0xFFFF); + in = insert_sorted_range(in, 0x20000, 0x2FFFF); + in = insert_sorted_range(in, 0x100000000, 0x1FFFFFFFF); + expected = insert_sorted_range(expected, 0x1000, 0x1FFF); + expected = insert_sorted_range(expected, 0x10000, 0x1FFFF); + expected = insert_sorted_range(expected, 0x30000, 0xFFFFFFFF); + expected = insert_sorted_range(expected, 0x200000000, 0xFFFFFFFFFFFF); + run_range_inverse_array("test5", &in, &expected, 0x1000, 0xFFFFFFFFFFFF); + + /* test 6 */ + + in = insert_sorted_range(in, 0x10000000 , 0x1FFFFFFF); + in = insert_sorted_range(in, 0x100000000, 0x1FFFFFFFF); + expected = insert_sorted_range(expected, 0x0, 0xFFFF); + run_range_inverse_array("test6", &in, &expected, 0x0, 0xFFFF); +} + +static ReservedRegion *alloc_resv_mem(unsigned type, uint64_t lob, uint64_t upb) +{ + ReservedRegion *r; + + r = g_new0(ReservedRegion, 1); + r->type = type; + range_set_bounds(&r->range, lob, upb); + return r; +} + +static void print_resv_region_list(const char *prefix, GList *list, + uint32_t expected_length) +{ + int i = g_list_length(list); + + g_assert_cmpint(i, ==, expected_length); +#if DEBUG + i = 0; + for (GList *l = list; l; l = l->next) { + ReservedRegion *r = (ReservedRegion *)l->data; + Range *range = &r->range; + + printf("%s item[%d]=[0x%x, 0x%"PRIx64", 0x%"PRIx64"]\n", + prefix, i++, r->type, range_lob(range), range_upb(range)); + } +#endif +} + +static void free_resv_region(gpointer data) +{ + ReservedRegion *reg = (ReservedRegion *)data; + + g_free(reg); +} + +static void check_resv_region_list_insert(void) +{ + ReservedRegion *r[10]; + GList *l = NULL; + + r[0] = alloc_resv_mem(0xA, 0, 0xFFFF); + r[1] = alloc_resv_mem(0xA, 0x20000, 0x2FFFF); + l = resv_region_list_insert(l, r[0]); + l = resv_region_list_insert(l, r[1]); + print_resv_region_list("test1", l, 2); + + /* adjacent on left */ + r[2] = alloc_resv_mem(0xB, 0x0, 0xFFF); + l = resv_region_list_insert(l, r[2]); + /* adjacent on right */ + r[3] = alloc_resv_mem(0xC, 0x21000, 0x2FFFF); + l = resv_region_list_insert(l, r[3]); + print_resv_region_list("test2", l, 4); + + /* exact overlap of D into C*/ + r[4] = alloc_resv_mem(0xD, 0x21000, 0x2FFFF); + l = resv_region_list_insert(l, r[4]); + print_resv_region_list("test3", l, 4); + + /* in the middle */ + r[5] = alloc_resv_mem(0xE, 0x22000, 0x23FFF); + l = resv_region_list_insert(l, r[5]); + print_resv_region_list("test4", l, 6); + + /* overwrites several existing ones */ + r[6] = alloc_resv_mem(0xF, 0x10000, 0x2FFFF); + l = resv_region_list_insert(l, r[6]); + print_resv_region_list("test5", l, 3); + + /* contiguous at the end */ + r[7] = alloc_resv_mem(0x0, 0x30000, 0x40000); + l = resv_region_list_insert(l, r[7]); + print_resv_region_list("test6", l, 4); + + g_list_free_full(l, free_resv_region); + l = NULL; + + r[0] = alloc_resv_mem(0x0, 0x10000, 0x1FFFF); + l = resv_region_list_insert(l, r[0]); + /* insertion before the 1st item */ + r[1] = alloc_resv_mem(0x1, 0x0, 0xFF); + l = resv_region_list_insert(l, r[1]); + print_resv_region_list("test8", l, 2); + + /* collision on the left side */ + r[2] = alloc_resv_mem(0xA, 0x1200, 0x11FFF); + l = resv_region_list_insert(l, r[2]); + print_resv_region_list("test9", l, 3); + + /* collision on the right side */ + r[3] = alloc_resv_mem(0xA, 0x1F000, 0x2FFFF); + l = resv_region_list_insert(l, r[3]); + print_resv_region_list("test10", l, 4); + + /* override everything */ + r[4] = alloc_resv_mem(0xF, 0x0, UINT64_MAX); + l = resv_region_list_insert(l, r[4]); + print_resv_region_list("test11", l, 1); + + g_list_free_full(l, free_resv_region); + l = NULL; + + r[0] = alloc_resv_mem(0xF, 0x1000000000000, UINT64_MAX); + l = resv_region_list_insert(l, r[0]); + print_resv_region_list("test12", l, 1); + + r[1] = alloc_resv_mem(0xA, 0x0, 0xFFFFFFF); + l = resv_region_list_insert(l, r[1]); + print_resv_region_list("test12", l, 2); + + r[2] = alloc_resv_mem(0xB, 0x100000000, 0x1FFFFFFFF); + l = resv_region_list_insert(l, r[2]); + print_resv_region_list("test12", l, 3); + + r[3] = alloc_resv_mem(0x0, 0x010000000, 0x2FFFFFFFF); + l = resv_region_list_insert(l, r[3]); + print_resv_region_list("test12", l, 3); + + g_list_free_full(l, free_resv_region); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/resv-mem/range_reverse_array", + check_range_reverse_array); + g_test_add_func("/resv-mem/range_reverse_array_low_end", + check_range_reverse_array_low_end); + g_test_add_func("/resv-mem/resv_region_list_insert", + check_resv_region_list_insert); + + g_test_run(); + + return 0; +} diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c index aedc125ae9..739b91583c 100644 --- a/tests/unit/test-uuid.c +++ b/tests/unit/test-uuid.c @@ -145,7 +145,7 @@ static void test_uuid_unparse(void) int i; for (i = 0; i < ARRAY_SIZE(uuid_test_data); i++) { - char out[37]; + char out[UUID_STR_LEN]; if (!uuid_test_data[i].check_unparse) { continue; diff --git a/util/meson.build b/util/meson.build index 769b24f2e0..c3a24af5c5 100644 --- a/util/meson.build +++ b/util/meson.build @@ -52,6 +52,7 @@ util_ss.add(files('qdist.c')) util_ss.add(files('qht.c')) util_ss.add(files('qsp.c')) util_ss.add(files('range.c')) +util_ss.add(files('reserved-region.c')) util_ss.add(files('stats64.c')) util_ss.add(files('systemd.c')) util_ss.add(files('transactions.c')) diff --git a/util/range.c b/util/range.c index 098d9d2dc0..9605ccfcbe 100644 --- a/util/range.c +++ b/util/range.c @@ -20,11 +20,7 @@ #include "qemu/osdep.h" #include "qemu/range.h" -/* - * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap. - * Both @a and @b must not be empty. - */ -static inline int range_compare(Range *a, Range *b) +int range_compare(Range *a, Range *b) { assert(!range_is_empty(a) && !range_is_empty(b)); @@ -70,3 +66,58 @@ GList *range_list_insert(GList *list, Range *data) return list; } + +static inline +GList *append_new_range(GList *list, uint64_t lob, uint64_t upb) +{ + Range *new = g_new0(Range, 1); + + range_set_bounds(new, lob, upb); + return g_list_append(list, new); +} + + +void range_inverse_array(GList *in, GList **rev, + uint64_t low, uint64_t high) +{ + Range *r, *rn; + GList *l = in, *out = *rev; + + for (l = in; l && range_upb(l->data) < low; l = l->next) { + continue; + } + + if (!l) { + out = append_new_range(out, low, high); + goto exit; + } + r = (Range *)l->data; + + /* first range lob is greater than min, insert a first range */ + if (range_lob(r) > low) { + out = append_new_range(out, low, MIN(range_lob(r) - 1, high)); + } + + /* insert a range inbetween each original range until we reach high */ + for (; l->next; l = l->next) { + r = (Range *)l->data; + rn = (Range *)l->next->data; + if (range_lob(r) >= high) { + goto exit; + } + if (range_compare(r, rn)) { + out = append_new_range(out, range_upb(r) + 1, + MIN(range_lob(rn) - 1, high)); + } + } + + /* last range */ + r = (Range *)l->data; + + /* last range upb is less than max, insert a last range */ + if (range_upb(r) < high) { + out = append_new_range(out, range_upb(r) + 1, high); + } +exit: + *rev = out; +} diff --git a/util/reserved-region.c b/util/reserved-region.c new file mode 100644 index 0000000000..18f83eb4c6 --- /dev/null +++ b/util/reserved-region.c @@ -0,0 +1,91 @@ +/* + * QEMU ReservedRegion helpers + * + * Copyright (c) 2023 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/range.h" +#include "qemu/reserved-region.h" + +GList *resv_region_list_insert(GList *list, ReservedRegion *reg) +{ + ReservedRegion *resv_iter, *new_reg; + Range *r = ®->range; + Range *range_iter; + GList *l; + + for (l = list; l ; ) { + resv_iter = (ReservedRegion *)l->data; + range_iter = &resv_iter->range; + + /* Skip all list elements strictly less than range to add */ + if (range_compare(range_iter, r) < 0) { + l = l->next; + } else if (range_compare(range_iter, r) > 0) { + return g_list_insert_before(list, l, reg); + } else { /* there is an overlap */ + if (range_contains_range(r, range_iter)) { + /* new range contains current item, simply remove this latter */ + GList *prev = l->prev; + g_free(l->data); + list = g_list_delete_link(list, l); + if (prev) { + l = prev->next; + } else { + l = list; + } + } else if (range_contains_range(range_iter, r)) { + /* new region is included in the current region */ + if (range_lob(range_iter) == range_lob(r)) { + /* adjacent on the left side, derives into 2 regions */ + range_set_bounds(range_iter, range_upb(r) + 1, + range_upb(range_iter)); + return g_list_insert_before(list, l, reg); + } else if (range_upb(range_iter) == range_upb(r)) { + /* adjacent on the right side, derives into 2 regions */ + range_set_bounds(range_iter, range_lob(range_iter), + range_lob(r) - 1); + l = l->next; + } else { + uint64_t lob = range_lob(range_iter); + /* + * the new range is in the middle of an existing one, + * split this latter into 3 regs instead + */ + range_set_bounds(range_iter, range_upb(r) + 1, + range_upb(range_iter)); + new_reg = g_new0(ReservedRegion, 1); + new_reg->type = resv_iter->type; + range_set_bounds(&new_reg->range, + lob, range_lob(r) - 1); + list = g_list_insert_before(list, l, new_reg); + return g_list_insert_before(list, l, reg); + } + } else if (range_lob(r) < range_lob(range_iter)) { + range_set_bounds(range_iter, range_upb(r) + 1, + range_upb(range_iter)); + return g_list_insert_before(list, l, reg); + } else { /* intersection on the upper range */ + range_set_bounds(range_iter, range_lob(range_iter), + range_lob(r) - 1); + l = l->next; + } + } /* overlap */ + } + return g_list_append(list, reg); +} + diff --git a/util/uuid.c b/util/uuid.c index d71aa79e5e..234619dd5e 100644 --- a/util/uuid.c +++ b/util/uuid.c @@ -51,7 +51,7 @@ int qemu_uuid_is_equal(const QemuUUID *lhv, const QemuUUID *rhv) void qemu_uuid_unparse(const QemuUUID *uuid, char *out) { const unsigned char *uu = &uuid->data[0]; - snprintf(out, UUID_FMT_LEN + 1, UUID_FMT, + snprintf(out, UUID_STR_LEN, UUID_FMT, uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7], uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]); } |