aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/vfio/pci.c49
-rw-r--r--hw/vfio/trace-events1
2 files changed, 40 insertions, 10 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a171056b41..f2c679e47c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1772,6 +1772,12 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
uint8_t cap_ver;
uint8_t *config;
+ /* Only add extended caps if we have them and the guest can see them */
+ if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
+ !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+ return 0;
+ }
+
/*
* pcie_add_capability always inserts the new capability at the tail
* of the chain. Therefore to end up with a chain that matches the
@@ -1780,6 +1786,25 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
*/
config = g_memdup(pdev->config, vdev->config_size);
+ /*
+ * Extended capabilities are chained with each pointing to the next, so we
+ * can drop anything other than the head of the chain simply by modifying
+ * the previous next pointer. For the head of the chain, we can modify the
+ * capability ID to something that cannot match a valid capability. ID
+ * 0 is reserved for this since absence of capabilities is indicated by
+ * 0 for the ID, version, AND next pointer. However, pcie_add_capability()
+ * uses ID 0 as reserved for list management and will incorrectly match and
+ * assert if we attempt to pre-load the head of the chain with with this
+ * ID. Use ID 0xFFFF temporarily since it is also seems to be reserved in
+ * part for identifying absence of capabilities in a root complex register
+ * block. If the ID still exists after adding capabilities, switch back to
+ * zero. We'll mark this entire first dword as emulated for this purpose.
+ */
+ pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE,
+ PCI_EXT_CAP(0xFFFF, 0, 0));
+ pci_set_long(pdev->wmask + PCI_CONFIG_SPACE_SIZE, 0);
+ pci_set_long(vdev->emulated_config_bits + PCI_CONFIG_SPACE_SIZE, ~0);
+
for (next = PCI_CONFIG_SPACE_SIZE; next;
next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
header = pci_get_long(config + next);
@@ -1794,12 +1819,23 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
*/
size = vfio_ext_cap_max_size(config, next);
- pcie_add_capability(pdev, cap_id, cap_ver, next, size);
- pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
-
/* Use emulated next pointer to allow dropping extended caps */
pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
PCI_EXT_CAP_NEXT_MASK);
+
+ switch (cap_id) {
+ case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
+ trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
+ break;
+ default:
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ }
+
+ }
+
+ /* Cleanup chain head ID if necessary */
+ if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) {
+ pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
}
g_free(config);
@@ -1821,13 +1857,6 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev)
return ret;
}
- /* on PCI bus, it doesn't make sense to expose extended capabilities. */
- if (!pci_is_express(pdev) ||
- !pci_bus_is_express(pdev->bus) ||
- !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
- return 0;
- }
-
return vfio_add_ext_cap(vdev);
}
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 9da0ff928b..a768fb54ec 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -37,6 +37,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
vfio_initfn(const char *name, int group_id) " (%s) group %d"
+vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s %x@%x"
vfio_pci_reset(const char *name) " (%s)"
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
vfio_pci_reset_pm(const char *name) "%s PCI PM Reset"