aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/misc/vfio.c133
1 files changed, 128 insertions, 5 deletions
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index f7f8a19ee8..9aecaa82bc 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -52,6 +52,8 @@
/* Extra debugging, trap acceleration paths for more logging */
#define VFIO_ALLOW_MMAP 1
#define VFIO_ALLOW_KVM_INTX 1
+#define VFIO_ALLOW_KVM_MSI 1
+#define VFIO_ALLOW_KVM_MSIX 1
struct VFIODevice;
@@ -208,6 +210,17 @@ static QLIST_HEAD(, VFIOContainer)
static QLIST_HEAD(, VFIOGroup)
group_list = QLIST_HEAD_INITIALIZER(group_list);
+#ifdef CONFIG_KVM
+/*
+ * We have a single VFIO pseudo device per KVM VM. Once created it lives
+ * for the life of the VM. Closing the file descriptor only drops our
+ * reference to it and the device's reference to kvm. Therefore once
+ * initialized, this file descriptor is only released on QEMU exit and
+ * we'll re-use it should another vfio device be attached before then.
+ */
+static int vfio_kvm_device_fd = -1;
+#endif
+
static void vfio_disable_interrupts(VFIODevice *vdev);
static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
@@ -579,9 +592,21 @@ static void vfio_msi_interrupt(void *opaque)
return;
}
- DPRINTF("%s(%04x:%02x:%02x.%x) vector %d\n", __func__,
+#ifdef VFIO_DEBUG
+ MSIMessage msg;
+
+ if (vdev->interrupt == VFIO_INT_MSIX) {
+ msg = msi_get_message(&vdev->pdev, nr);
+ } else if (vdev->interrupt == VFIO_INT_MSI) {
+ msg = msix_get_message(&vdev->pdev, nr);
+ } else {
+ abort();
+ }
+
+ DPRINTF("%s(%04x:%02x:%02x.%x) vector %d 0x%"PRIx64"/0x%x\n", __func__,
vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function, nr);
+ vdev->host.function, nr, msg.address, msg.data);
+#endif
if (vdev->interrupt == VFIO_INT_MSIX) {
msix_notify(&vdev->pdev, nr);
@@ -649,7 +674,8 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
+ vector->virq = msg && VFIO_ALLOW_KVM_MSIX ?
+ kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
if (vector->virq < 0 ||
kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
NULL, vector->virq) < 0) {
@@ -816,7 +842,8 @@ retry:
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vector->virq = kvm_irqchip_add_msi_route(kvm_state, vector->msg);
+ vector->virq = VFIO_ALLOW_KVM_MSI ?
+ kvm_irqchip_add_msi_route(kvm_state, vector->msg) : -1;
if (vector->virq < 0 ||
kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
NULL, vector->virq) < 0) {
@@ -878,8 +905,20 @@ static void vfio_disable_msi_common(VFIODevice *vdev)
static void vfio_disable_msix(VFIODevice *vdev)
{
+ int i;
+
msix_unset_vector_notifiers(&vdev->pdev);
+ /*
+ * MSI-X will only release vectors if MSI-X is still enabled on the
+ * device, check through the rest and release it ourselves if necessary.
+ */
+ for (i = 0; i < vdev->nr_vectors; i++) {
+ if (vdev->msi_vectors[i].use) {
+ vfio_msix_vector_release(&vdev->pdev, i);
+ }
+ }
+
if (vdev->nr_vectors) {
vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX);
}
@@ -1800,6 +1839,34 @@ static void vfio_probe_nvidia_bar5_window_quirk(VFIODevice *vdev, int nr)
vdev->host.function);
}
+static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOQuirk *quirk = opaque;
+ VFIODevice *vdev = quirk->vdev;
+ PCIDevice *pdev = &vdev->pdev;
+ hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
+
+ vfio_generic_quirk_write(opaque, addr, data, size);
+
+ /*
+ * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
+ * MSI capability ID register. Both the ID and next register are
+ * read-only, so we allow writes covering either of those to real hw.
+ * NB - only fixed for the 0x88000 MMIO window.
+ */
+ if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
+ vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
+ vfio_bar_write(&vdev->bars[quirk->data.bar], addr + base, data, size);
+ }
+}
+
+static const MemoryRegionOps vfio_nvidia_88000_quirk = {
+ .read = vfio_generic_quirk_read,
+ .write = vfio_nvidia_88000_quirk_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
/*
* Finally, BAR0 itself. We want to redirect any accesses to either
* 0x1800 or 0x88000 through the PCI config space access functions.
@@ -1826,7 +1893,7 @@ static void vfio_probe_nvidia_bar0_88000_quirk(VFIODevice *vdev, int nr)
quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
quirk->data.bar = nr;
- memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk,
+ memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
quirk, "vfio-nvidia-bar0-88000-quirk",
TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
@@ -3041,6 +3108,59 @@ static void vfio_pci_reset_handler(void *opaque)
}
}
+static void vfio_kvm_device_add_group(VFIOGroup *group)
+{
+#ifdef CONFIG_KVM
+ struct kvm_device_attr attr = {
+ .group = KVM_DEV_VFIO_GROUP,
+ .attr = KVM_DEV_VFIO_GROUP_ADD,
+ .addr = (uint64_t)(unsigned long)&group->fd,
+ };
+
+ if (!kvm_enabled()) {
+ return;
+ }
+
+ if (vfio_kvm_device_fd < 0) {
+ struct kvm_create_device cd = {
+ .type = KVM_DEV_TYPE_VFIO,
+ };
+
+ if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
+ DPRINTF("KVM_CREATE_DEVICE: %m\n");
+ return;
+ }
+
+ vfio_kvm_device_fd = cd.fd;
+ }
+
+ if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+ error_report("Failed to add group %d to KVM VFIO device: %m",
+ group->groupid);
+ }
+#endif
+}
+
+static void vfio_kvm_device_del_group(VFIOGroup *group)
+{
+#ifdef CONFIG_KVM
+ struct kvm_device_attr attr = {
+ .group = KVM_DEV_VFIO_GROUP,
+ .attr = KVM_DEV_VFIO_GROUP_DEL,
+ .addr = (uint64_t)(unsigned long)&group->fd,
+ };
+
+ if (vfio_kvm_device_fd < 0) {
+ return;
+ }
+
+ if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+ error_report("Failed to remove group %d to KVM VFIO device: %m",
+ group->groupid);
+ }
+#endif
+}
+
static int vfio_connect_container(VFIOGroup *group)
{
VFIOContainer *container;
@@ -3189,6 +3309,8 @@ static VFIOGroup *vfio_get_group(int groupid)
QLIST_INSERT_HEAD(&group_list, group, next);
+ vfio_kvm_device_add_group(group);
+
return group;
}
@@ -3198,6 +3320,7 @@ static void vfio_put_group(VFIOGroup *group)
return;
}
+ vfio_kvm_device_del_group(group);
vfio_disconnect_container(group);
QLIST_REMOVE(group, next);
DPRINTF("vfio_put_group: close group->fd\n");