From ef536007c3301bbd6a787e4c2210ea289adaa6f0 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 28 Oct 2022 15:47:57 -0400 Subject: s390x/pci: coalesce unmap operations Currently, each unmapped page is handled as an individual iommu region notification. Attempt to group contiguous unmap operations into fewer notifications to reduce overhead. Signed-off-by: Matthew Rosato Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> Reviewed-by: Eric Farman Signed-off-by: Thomas Huth --- hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'hw') diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 7cc4bcf850..66e764f901 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, } g_hash_table_remove(iommu->iotlb, &entry->iova); inc_dma_avail(iommu); + /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ + goto out; } else { if (cache) { if (cache->perm == entry->perm && @@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, dec_dma_avail(iommu); } + /* + * All associated iotlb entries have already been cleared, trigger the + * unmaps. + */ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); out: return iommu->dma_limit ? iommu->dma_limit->avail : 1; } +static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, + uint64_t len) +{ + uint64_t remain = len, start = iova, end = start + len - 1, mask, size; + IOMMUTLBEvent event = { + .type = IOMMU_NOTIFIER_UNMAP, + .entry = { + .target_as = &address_space_memory, + .translated_addr = 0, + .perm = IOMMU_NONE, + }, + }; + + while (remain >= TARGET_PAGE_SIZE) { + mask = dma_aligned_pow2_mask(start, end, 64); + size = mask + 1; + event.entry.iova = start; + event.entry.addr_mask = mask; + memory_region_notify_iommu(&iommu->iommu_mr, 0, event); + start += size; + remain -= size; + } +} + int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) { CPUS390XState *env = &cpu->env; + uint64_t iova, coalesce = 0; uint32_t fh; uint16_t error = 0; S390PCIBusDevice *pbdev; @@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) break; } + /* + * If this is an unmap of a PTE, let's try to coalesce multiple unmaps + * into as few notifier events as possible. + */ + if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { + if (coalesce == 0) { + iova = entry.iova; + } + coalesce += entry.len; + } else if (coalesce > 0) { + /* Unleash the coalesced unmap before processing a new map */ + s390_pci_batch_unmap(iommu, iova, coalesce); + coalesce = 0; + } + start += entry.len; while (entry.iova < start && entry.iova < end) { if (dma_avail > 0 || entry.perm == IOMMU_NONE) { @@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) } } } + if (coalesce) { + /* Unleash the coalesced unmap before finishing rpcit */ + s390_pci_batch_unmap(iommu, iova, coalesce); + coalesce = 0; + } if (again && dma_avail > 0) goto retry; err: -- cgit v1.2.3 From df202e3ff3fccb49868e08f20d0bda86cb953fbe Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 28 Oct 2022 15:47:58 -0400 Subject: s390x/pci: shrink DMA aperture to be bound by vfio DMA limit Currently, s390x-pci performs accounting against the vfio DMA limit and triggers the guest to clean up mappings when the limit is reached. Let's go a step further and also limit the size of the supported DMA aperture reported to the guest based upon the initial vfio DMA limit reported for the container (if less than than the size reported by the firmware/host zPCI layer). This avoids processing sections of the guest DMA table during global refresh that, for common use cases, will never be used anway, and makes exhausting the vfio DMA limit due to mismatch between guest aperture size and host limit far less likely and more indicitive of an error. Signed-off-by: Matthew Rosato Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> Reviewed-by: Eric Farman Signed-off-by: Thomas Huth --- hw/s390x/s390-pci-vfio.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'hw') diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c index 5f0adb0b4a..f7bf36cec8 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, cnt->users = 1; cnt->avail = avail; QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); + pbdev->iommu->max_dma_limit = avail; return cnt; } @@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_base *cap; VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + uint64_t vfio_size; hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); @@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, /* The following values remain 0 until we support other FMB formats */ pbdev->zpci_fn.fmbl = 0; pbdev->zpci_fn.pft = 0; + + /* + * If appropriate, reduce the size of the supported DMA aperture reported + * to the guest based upon the vfio DMA limit. + */ + vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; + if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { + pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; + } } static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, -- cgit v1.2.3 From 03451953c79e6b31f7860ee0c35b28e181d573c1 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 9 Dec 2022 14:57:00 -0500 Subject: s390x/pci: reset ISM passthrough devices on shutdown and system reset ISM device firmware stores unique state information that can can cause a wholesale unmap of the associated IOMMU (e.g. when we get a termination signal for QEMU) to trigger firmware errors because firmware believes we are attempting to invalidate entries that are still in-use by the guest OS (when in fact that guest is in the process of being terminated or rebooted). To alleviate this, register both a shutdown notifier (for unexpected termination cases e.g. virsh destroy) as well as a reset callback (for cases like guest OS reboot). For each of these scenarios, trigger PCI device reset; this is enough to indicate to firmware that the IOMMU is no longer in-use by the guest OS, making it safe to invalidate any associated IOMMU entries. Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") Signed-off-by: Matthew Rosato Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> Reviewed-by: Eric Farman [thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] Signed-off-by: Thomas Huth --- hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ hw/s390x/s390-pci-vfio.c | 2 ++ 2 files changed, 30 insertions(+) (limited to 'hw') diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 977e7daa15..02751f3597 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -24,6 +24,8 @@ #include "hw/pci/msi.h" #include "qemu/error-report.h" #include "qemu/module.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" #ifndef DEBUG_S390PCI_BUS #define DEBUG_S390PCI_BUS 0 @@ -150,10 +152,30 @@ out: psccb->header.response_code = cpu_to_be16(rc); } +static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) +{ + S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, + shutdown_notifier); + + pci_device_reset(pbdev->pdev); +} + +static void s390_pci_reset_cb(void *opaque) +{ + S390PCIBusDevice *pbdev = opaque; + + pci_device_reset(pbdev->pdev); +} + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) { HotplugHandler *hotplug_ctrl; + if (pbdev->pft == ZPCI_PFT_ISM) { + notifier_remove(&pbdev->shutdown_notifier); + qemu_unregister_reset(s390_pci_reset_cb, pbdev); + } + /* Unplug the PCI device */ if (pbdev->pdev) { DeviceState *pdev = DEVICE(pbdev->pdev); @@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, pbdev->fh |= FH_SHM_VFIO; pbdev->forwarding_assist = false; } + /* Register shutdown notifier and reset callback for ISM devices */ + if (pbdev->pft == ZPCI_PFT_ISM) { + pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; + qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); + qemu_register_reset(s390_pci_reset_cb, pbdev); + } } else { pbdev->fh |= FH_SHM_EMUL; /* Always intercept emulated devices */ diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c index f7bf36cec8..f51190d466 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, /* The following values remain 0 until we support other FMB formats */ pbdev->zpci_fn.fmbl = 0; pbdev->zpci_fn.pft = 0; + /* Store function type separately for type-specific behavior */ + pbdev->pft = cap->pft; /* * If appropriate, reduce the size of the supported DMA aperture reported -- cgit v1.2.3 From c57e0ea6b4ed0d3ebd4fa8d0210da002b6a0dfbc Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 13 Dec 2022 10:46:54 +0100 Subject: hw: Include the VMWare devices only in the x86 targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems a little bit weird that the para-virtualized x86 VMWare devices "vmware-svga" and "vmxnet3" also show up in non-x86 targets. They are likely pretty useless there (since the guest OSes likely do not have any drivers for those enabled), so let's change this and only enable those devices by default for the classical x86 targets. Message-Id: <20221213095144.42355-1-thuth@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth --- hw/display/Kconfig | 2 +- hw/net/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/display/Kconfig b/hw/display/Kconfig index a1b159becd..7b3da68d1c 100644 --- a/hw/display/Kconfig +++ b/hw/display/Kconfig @@ -55,7 +55,7 @@ config VGA_MMIO config VMWARE_VGA bool - default y if PCI_DEVICES + default y if PCI_DEVICES && PC_PCI depends on PCI select VGA diff --git a/hw/net/Kconfig b/hw/net/Kconfig index 6d795ec752..1cc1c5775e 100644 --- a/hw/net/Kconfig +++ b/hw/net/Kconfig @@ -51,7 +51,7 @@ config RTL8139_PCI config VMXNET3_PCI bool - default y if PCI_DEVICES + default y if PCI_DEVICES && PC_PCI depends on PCI config SMC91C111 -- cgit v1.2.3