aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/vfio/pci-quirks.c6
-rw-r--r--hw/vfio/pci.c39
-rw-r--r--hw/vfio/pci.h1
-rw-r--r--trace-events2
4 files changed, 45 insertions, 3 deletions
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 30c68a1e2b..e117c41fbe 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -328,7 +328,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
window->data_offset = 4;
window->nr_matches = 1;
window->matches[0].match = 0x4000;
- window->matches[0].mask = PCIE_CONFIG_SPACE_SIZE - 1;
+ window->matches[0].mask = vdev->config_size - 1;
window->bar = nr;
window->addr_mem = &quirk->mem[0];
window->data_mem = &quirk->mem[1];
@@ -674,7 +674,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
window->matches[0].match = 0x1800;
window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
window->matches[1].match = 0x88000;
- window->matches[1].mask = PCIE_CONFIG_SPACE_SIZE - 1;
+ window->matches[1].mask = vdev->config_size - 1;
window->bar = nr;
window->addr_mem = bar5->addr_mem = &quirk->mem[0];
window->data_mem = bar5->data_mem = &quirk->mem[1];
@@ -765,7 +765,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(mirror->mem, OBJECT(vdev),
&vfio_nvidia_mirror_quirk, mirror,
"vfio-nvidia-bar0-88000-mirror-quirk",
- PCIE_CONFIG_SPACE_SIZE);
+ vdev->config_size);
memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
mirror->offset, mirror->mem, 1);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 1fb868c244..e66c47ff6a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -356,6 +356,13 @@ static void vfio_msi_interrupt(void *opaque)
if (vdev->interrupt == VFIO_INT_MSIX) {
get_msg = msix_get_message;
notify = msix_notify;
+
+ /* A masked vector firing needs to use the PBA, enable it */
+ if (msix_is_masked(&vdev->pdev, nr)) {
+ set_bit(nr, vdev->msix->pending);
+ memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, true);
+ trace_vfio_msix_pba_enable(vdev->vbasedev.name);
+ }
} else if (vdev->interrupt == VFIO_INT_MSI) {
get_msg = msi_get_message;
notify = msi_notify;
@@ -535,6 +542,14 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
}
}
+ /* Disable PBA emulation when nothing more is pending. */
+ clear_bit(nr, vdev->msix->pending);
+ if (find_first_bit(vdev->msix->pending,
+ vdev->nr_vectors) == vdev->nr_vectors) {
+ memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false);
+ trace_vfio_msix_pba_disable(vdev->vbasedev.name);
+ }
+
return 0;
}
@@ -738,6 +753,9 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
vfio_msi_disable_common(vdev);
+ memset(vdev->msix->pending, 0,
+ BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long));
+
trace_vfio_msix_disable(vdev->vbasedev.name);
}
@@ -1251,6 +1269,8 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
{
int ret;
+ vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) *
+ sizeof(unsigned long));
ret = msix_init(&vdev->pdev, vdev->msix->entries,
&vdev->bars[vdev->msix->table_bar].region.mem,
vdev->msix->table_bar, vdev->msix->table_offset,
@@ -1264,6 +1284,24 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
return ret;
}
+ /*
+ * The PCI spec suggests that devices provide additional alignment for
+ * MSI-X structures and avoid overlapping non-MSI-X related registers.
+ * For an assigned device, this hopefully means that emulation of MSI-X
+ * structures does not affect the performance of the device. If devices
+ * fail to provide that alignment, a significant performance penalty may
+ * result, for instance Mellanox MT27500 VFs:
+ * http://www.spinics.net/lists/kvm/msg125881.html
+ *
+ * The PBA is simply not that important for such a serious regression and
+ * most drivers do not appear to look at it. The solution for this is to
+ * disable the PBA MemoryRegion unless it's being used. We disable it
+ * here and only enable it if a masked vector fires through QEMU. As the
+ * vector-use notifier is called, which occurs on unmask, we test whether
+ * PBA emulation is needed and again disable if not.
+ */
+ memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false);
+
return 0;
}
@@ -1275,6 +1313,7 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev)
msix_uninit(&vdev->pdev,
&vdev->bars[vdev->msix->table_bar].region.mem,
&vdev->bars[vdev->msix->pba_bar].region.mem);
+ g_free(vdev->msix->pending);
}
}
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index f004d52b69..62565878fc 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -95,6 +95,7 @@ typedef struct VFIOMSIXInfo {
uint32_t pba_offset;
MemoryRegion mmap_mem;
void *mmap;
+ unsigned long *pending;
} VFIOMSIXInfo;
typedef struct VFIOPCIDevice {
diff --git a/trace-events b/trace-events
index 934a7b6402..c9ac144cee 100644
--- a/trace-events
+++ b/trace-events
@@ -1631,6 +1631,8 @@ vfio_msi_interrupt(const char *name, int index, uint64_t addr, int data) " (%s)
vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used"
vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released"
vfio_msix_enable(const char *name) " (%s)"
+vfio_msix_pba_disable(const char *name) " (%s)"
+vfio_msix_pba_enable(const char *name) " (%s)"
vfio_msix_disable(const char *name) " (%s)"
vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors"
vfio_msi_disable(const char *name) " (%s)"