vfio/pci: Add NVIDIA GPUDirect Cliques support

NVIDIA has defined a specification for creating GPUDirect "cliques", where devices with the same clique ID support direct peer-to-peer DMA. When running on bare-metal, tools like NVIDIA's p2pBandwidthLatencyTest (part of cuda-samples) determine which GPUs can support peer-to-peer based on chipset and topology. When running in a VM, these tools have no visibility to the physical hardware support or topology. This option allows the user to specify hints via a vendor defined capability. For instance: <qemu:commandline> <qemu:arg value='-set'/> <qemu:arg value='device.hostdev0.x-nv-gpudirect-clique=0'/> <qemu:arg value='-set'/> <qemu:arg value='device.hostdev1.x-nv-gpudirect-clique=1'/> <qemu:arg value='-set'/> <qemu:arg value='device.hostdev2.x-nv-gpudirect-clique=1'/> </qemu:commandline> This enables two cliques. The first is a singleton clique with ID 0, for the first hostdev defined in the XML (note that since cliques define peer-to-peer sets, singleton clique offer no benefit). The subsequent two hostdevs are both added to clique ID 1, indicating peer-to-peer is possible between these devices. QEMU only provides validation that the clique ID is valid and applied to an NVIDIA graphics device, any validation that the resulting cliques are functional and valid is the user's responsibility. The NVIDIA specification allows a 4-bit clique ID, thus valid values are 0-15. Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
author: Alex Williamson <alex.williamson@redhat.com> 2017-08-29 16:05:47 -0600
committer: Alex Williamson <alex.williamson@redhat.com> 2017-10-03 12:57:36 -0600
commit: dfbee78db8fdf7bc8c151c3d29504bb47438480b (patch)
tree: 542b918bbbed80463a1d721075aa310c0170c7c5 /hw/vfio/pci-quirks.c
parent: e3f79f3bd4582b673a3a447edfe5211188741072 (diff)
1 files changed, 110 insertions, 0 deletions
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 40aaae76fe..14291c2a16 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -14,6 +14,7 @@
 #include "qemu/error-report.h"
 #include "qemu/range.h"
 #include "qapi/error.h"
+#include "qapi/visitor.h"
 #include "hw/nvram/fw_cfg.h"
 #include "pci.h"
 #include "trace.h"
@@ -1850,7 +1851,116 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
         break;
     }
 }
+
+/*
+ * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
+ * devices as a member of a clique.  Devices within the same clique ID
+ * are capable of direct P2P.  It's the user's responsibility that this
+ * is correct.  The spec says that this may reside at any unused config
+ * offset, but reserves and recommends hypervisors place this at C8h.
+ * The spec also states that the hypervisor should place this capability
+ * at the end of the capability list, thus next is defined as 0h.
+ *
+ * +----------------+----------------+----------------+----------------+
+ * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
+ * +----------------+----------------+----------------+----------------+
+ * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
+ * +---------------------------------+---------------------------------+
+ *
+ * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
+ */
+static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
+
+    visit_type_uint8(v, name, ptr, errp);
+}
+
+static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
+    Error *local_err = NULL;
+
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
+    }
+
+    visit_type_uint8(v, name, &value, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (value & ~0xF) {
+        error_setg(errp, "Property %s: valid range 0-15", name);
+        return;
+    }
+
+    *ptr = value;
+}
+
+const PropertyInfo qdev_prop_nv_gpudirect_clique = {
+    .name = "uint4",
+    .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
+    .get = get_nv_gpudirect_clique_id,
+    .set = set_nv_gpudirect_clique_id,
+};
+
+static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    int ret, pos = 0xC8;
+
+    if (vdev->nv_gpudirect_clique == 0xFF) {
+        return 0;
+    }
+
+    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
+        error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
+        return -EINVAL;
+    }
+
+    if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
+        PCI_BASE_CLASS_DISPLAY) {
+        error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
+        return -EINVAL;
+    }
+
+    ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
+    if (ret < 0) {
+        error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
+        return ret;
+    }
+
+    memset(vdev->emulated_config_bits + pos, 0xFF, 8);
+    pos += PCI_CAP_FLAGS;
+    pci_set_byte(pdev->config + pos++, 8);
+    pci_set_byte(pdev->config + pos++, 'P');
+    pci_set_byte(pdev->config + pos++, '2');
+    pci_set_byte(pdev->config + pos++, 'P');
+    pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
+    pci_set_byte(pdev->config + pos, 0);
+
+    return 0;
+}
+
 int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
 {
+    int ret;
+
+    ret = vfio_add_nv_gpudirect_cap(vdev, errp);
+    if (ret) {
+        return ret;
+    }
+
     return 0;
 }
author	Alex Williamson <alex.williamson@redhat.com>	2017-08-29 16:05:47 -0600
committer	Alex Williamson <alex.williamson@redhat.com>	2017-10-03 12:57:36 -0600
commit	dfbee78db8fdf7bc8c151c3d29504bb47438480b (patch)
tree	542b918bbbed80463a1d721075aa310c0170c7c5 /hw/vfio/pci-quirks.c
parent	e3f79f3bd4582b673a3a447edfe5211188741072 (diff)