diff options
-rw-r--r-- | hw/vfio/pci-quirks.c | 6 | ||||
-rw-r--r-- | hw/vfio/pci.c | 39 | ||||
-rw-r--r-- | hw/vfio/pci.h | 1 | ||||
-rw-r--r-- | trace-events | 2 |
4 files changed, 45 insertions, 3 deletions
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 30c68a1e2b..e117c41fbe 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -328,7 +328,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) window->data_offset = 4; window->nr_matches = 1; window->matches[0].match = 0x4000; - window->matches[0].mask = PCIE_CONFIG_SPACE_SIZE - 1; + window->matches[0].mask = vdev->config_size - 1; window->bar = nr; window->addr_mem = &quirk->mem[0]; window->data_mem = &quirk->mem[1]; @@ -674,7 +674,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) window->matches[0].match = 0x1800; window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1; window->matches[1].match = 0x88000; - window->matches[1].mask = PCIE_CONFIG_SPACE_SIZE - 1; + window->matches[1].mask = vdev->config_size - 1; window->bar = nr; window->addr_mem = bar5->addr_mem = &quirk->mem[0]; window->data_mem = bar5->data_mem = &quirk->mem[1]; @@ -765,7 +765,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(mirror->mem, OBJECT(vdev), &vfio_nvidia_mirror_quirk, mirror, "vfio-nvidia-bar0-88000-mirror-quirk", - PCIE_CONFIG_SPACE_SIZE); + vdev->config_size); memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 1fb868c244..e66c47ff6a 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -356,6 +356,13 @@ static void vfio_msi_interrupt(void *opaque) if (vdev->interrupt == VFIO_INT_MSIX) { get_msg = msix_get_message; notify = msix_notify; + + /* A masked vector firing needs to use the PBA, enable it */ + if (msix_is_masked(&vdev->pdev, nr)) { + set_bit(nr, vdev->msix->pending); + memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, true); + trace_vfio_msix_pba_enable(vdev->vbasedev.name); + } } else if (vdev->interrupt == VFIO_INT_MSI) { get_msg = msi_get_message; notify = msi_notify; @@ -535,6 +542,14 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, } } + /* Disable PBA emulation when nothing more is pending. */ + clear_bit(nr, vdev->msix->pending); + if (find_first_bit(vdev->msix->pending, + vdev->nr_vectors) == vdev->nr_vectors) { + memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); + trace_vfio_msix_pba_disable(vdev->vbasedev.name); + } + return 0; } @@ -738,6 +753,9 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) vfio_msi_disable_common(vdev); + memset(vdev->msix->pending, 0, + BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); + trace_vfio_msix_disable(vdev->vbasedev.name); } @@ -1251,6 +1269,8 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) { int ret; + vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * + sizeof(unsigned long)); ret = msix_init(&vdev->pdev, vdev->msix->entries, &vdev->bars[vdev->msix->table_bar].region.mem, vdev->msix->table_bar, vdev->msix->table_offset, @@ -1264,6 +1284,24 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) return ret; } + /* + * The PCI spec suggests that devices provide additional alignment for + * MSI-X structures and avoid overlapping non-MSI-X related registers. + * For an assigned device, this hopefully means that emulation of MSI-X + * structures does not affect the performance of the device. If devices + * fail to provide that alignment, a significant performance penalty may + * result, for instance Mellanox MT27500 VFs: + * http://www.spinics.net/lists/kvm/msg125881.html + * + * The PBA is simply not that important for such a serious regression and + * most drivers do not appear to look at it. The solution for this is to + * disable the PBA MemoryRegion unless it's being used. We disable it + * here and only enable it if a masked vector fires through QEMU. As the + * vector-use notifier is called, which occurs on unmask, we test whether + * PBA emulation is needed and again disable if not. + */ + memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); + return 0; } @@ -1275,6 +1313,7 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].region.mem, &vdev->bars[vdev->msix->pba_bar].region.mem); + g_free(vdev->msix->pending); } } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index f004d52b69..62565878fc 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -95,6 +95,7 @@ typedef struct VFIOMSIXInfo { uint32_t pba_offset; MemoryRegion mmap_mem; void *mmap; + unsigned long *pending; } VFIOMSIXInfo; typedef struct VFIOPCIDevice { diff --git a/trace-events b/trace-events index 934a7b6402..c9ac144cee 100644 --- a/trace-events +++ b/trace-events @@ -1631,6 +1631,8 @@ vfio_msi_interrupt(const char *name, int index, uint64_t addr, int data) " (%s) vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used" vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released" vfio_msix_enable(const char *name) " (%s)" +vfio_msix_pba_disable(const char *name) " (%s)" +vfio_msix_pba_enable(const char *name) " (%s)" vfio_msix_disable(const char *name) " (%s)" vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" vfio_msi_disable(const char *name) " (%s)" |