aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/pci.c')
-rw-r--r--hw/vfio/pci.c528
1 files changed, 270 insertions, 258 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 20b505f4ec..d091d8cf0e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -783,25 +783,25 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
{
- struct vfio_region_info reg_info = {
- .argsz = sizeof(reg_info),
- .index = VFIO_PCI_ROM_REGION_INDEX
- };
+ struct vfio_region_info *reg_info;
uint64_t size;
off_t off = 0;
ssize_t bytes;
- if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
+ if (vfio_get_region_info(&vdev->vbasedev,
+ VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
error_report("vfio: Error getting ROM info: %m");
return;
}
- trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size,
- (unsigned long)reg_info.offset,
- (unsigned long)reg_info.flags);
+ trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
+ (unsigned long)reg_info->offset,
+ (unsigned long)reg_info->flags);
+
+ vdev->rom_size = size = reg_info->size;
+ vdev->rom_offset = reg_info->offset;
- vdev->rom_size = size = reg_info.size;
- vdev->rom_offset = reg_info.offset;
+ g_free(reg_info);
if (!vdev->rom_size) {
vdev->rom_read_failed = true;
@@ -832,6 +832,36 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
break;
}
}
+
+ /*
+ * Test the ROM signature against our device, if the vendor is correct
+ * but the device ID doesn't match, store the correct device ID and
+ * recompute the checksum. Intel IGD devices need this and are known
+ * to have bogus checksums so we can't simply adjust the checksum.
+ */
+ if (pci_get_word(vdev->rom) == 0xaa55 &&
+ pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size &&
+ !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) {
+ uint16_t vid, did;
+
+ vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4);
+ did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6);
+
+ if (vid == vdev->vendor_id && did != vdev->device_id) {
+ int i;
+ uint8_t csum, *data = vdev->rom;
+
+ pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6,
+ vdev->device_id);
+ data[6] = 0;
+
+ for (csum = 0, i = 0; i < vdev->rom_size; i++) {
+ csum += data[i];
+ }
+
+ data[6] = -csum;
+ }
+ }
}
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
@@ -889,18 +919,14 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
DeviceState *dev = DEVICE(vdev);
- char name[32];
+ char *name;
int fd = vdev->vbasedev.fd;
if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
- error_printf("Warning : Device at %04x:%02x:%02x.%x "
- "is known to cause system instability issues during "
- "option rom execution. "
- "Proceeding anyway since user specified romfile\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n",
+ vdev->vbasedev.name);
}
return;
}
@@ -913,9 +939,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
pwrite(fd, &size, 4, offset) != 4 ||
pread(fd, &size, 4, offset) != 4 ||
pwrite(fd, &orig, 4, offset) != 4) {
- error_report("%s(%04x:%02x:%02x.%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
+ error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
return;
}
@@ -927,32 +951,22 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
if (vfio_blacklist_opt_rom(vdev)) {
if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
- error_printf("Warning : Device at %04x:%02x:%02x.%x "
- "is known to cause system instability issues during "
- "option rom execution. "
- "Proceeding anyway since user specified non zero value for "
- "rombar\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n",
+ vdev->vbasedev.name);
} else {
- error_printf("Warning : Rom loading for device at "
- "%04x:%02x:%02x.%x has been disabled due to "
- "system instability issues. "
- "Specify rombar=1 or romfile to force\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n",
+ vdev->vbasedev.name);
return;
}
}
trace_vfio_pci_size_rom(vdev->vbasedev.name, size);
- snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name);
memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev),
&vfio_rom_ops, vdev, name, size);
+ g_free(name);
pci_register_bar(&vdev->pdev, PCI_ROM_SLOT,
PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom);
@@ -1063,9 +1077,8 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
ret = pread(vdev->vbasedev.fd, &phys_val, len,
vdev->config_offset + addr);
if (ret != len) {
- error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, addr, len);
+ error_report("%s(%s, 0x%x, 0x%x) failed: %m",
+ __func__, vdev->vbasedev.name, addr, len);
return -errno;
}
phys_val = le32_to_cpu(phys_val);
@@ -1089,9 +1102,8 @@ void vfio_pci_write_config(PCIDevice *pdev,
/* Write everything to VFIO, let it filter out what we can't write */
if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
!= len) {
- error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, addr, val, len);
+ error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m",
+ __func__, vdev->vbasedev.name, addr, val, len);
}
/* MSI/MSI-X Enabling/Disabling */
@@ -1185,6 +1197,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
return 0;
}
+static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev)
+{
+ off_t start, end;
+ VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region;
+
+ /*
+ * We expect to find a single mmap covering the whole BAR, anything else
+ * means it's either unsupported or already setup.
+ */
+ if (region->nr_mmaps != 1 || region->mmaps[0].offset ||
+ region->size != region->mmaps[0].size) {
+ return;
+ }
+
+ /* MSI-X table start and end aligned to host page size */
+ start = vdev->msix->table_offset & qemu_real_host_page_mask;
+ end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
+ (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+
+ /*
+ * Does the MSI-X table cover the beginning of the BAR? The whole BAR?
+ * NB - Host page size is necessarily a power of two and so is the PCI
+ * BAR (not counting EA yet), therefore if we have host page aligned
+ * @start and @end, then any remainder of the BAR before or after those
+ * must be at least host page sized and therefore mmap'able.
+ */
+ if (!start) {
+ if (end >= region->size) {
+ region->nr_mmaps = 0;
+ g_free(region->mmaps);
+ region->mmaps = NULL;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, 0, 0);
+ } else {
+ region->mmaps[0].offset = end;
+ region->mmaps[0].size = region->size - end;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[0].offset,
+ region->mmaps[0].offset + region->mmaps[0].size);
+ }
+
+ /* Maybe it's aligned at the end of the BAR */
+ } else if (end >= region->size) {
+ region->mmaps[0].size = start;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[0].offset,
+ region->mmaps[0].offset + region->mmaps[0].size);
+
+ /* Otherwise it must split the BAR */
+ } else {
+ region->nr_mmaps = 2;
+ region->mmaps = g_renew(VFIOMmap, region->mmaps, 2);
+
+ memcpy(&region->mmaps[1], &region->mmaps[0], sizeof(VFIOMmap));
+
+ region->mmaps[0].size = start;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[0].offset,
+ region->mmaps[0].offset + region->mmaps[0].size);
+
+ region->mmaps[1].offset = end;
+ region->mmaps[1].size = region->size - end;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[1].offset,
+ region->mmaps[1].offset + region->mmaps[1].size);
+ }
+}
+
/*
* We don't have any control over how pci_add_capability() inserts
* capabilities into the chain. In order to setup MSI-X we need a
@@ -1259,6 +1339,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
msix->table_offset, msix->entries);
vdev->msix = msix;
+ vfio_pci_fixup_msix_region(vdev);
+
return 0;
}
@@ -1269,9 +1351,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) *
sizeof(unsigned long));
ret = msix_init(&vdev->pdev, vdev->msix->entries,
- &vdev->bars[vdev->msix->table_bar].region.mem,
+ vdev->bars[vdev->msix->table_bar].region.mem,
vdev->msix->table_bar, vdev->msix->table_offset,
- &vdev->bars[vdev->msix->pba_bar].region.mem,
+ vdev->bars[vdev->msix->pba_bar].region.mem,
vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
if (ret < 0) {
if (ret == -ENOTSUP) {
@@ -1308,8 +1390,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev)
if (vdev->msix) {
msix_uninit(&vdev->pdev,
- &vdev->bars[vdev->msix->table_bar].region.mem,
- &vdev->bars[vdev->msix->pba_bar].region.mem);
+ vdev->bars[vdev->msix->table_bar].region.mem,
+ vdev->bars[vdev->msix->pba_bar].region.mem);
g_free(vdev->msix->pending);
}
}
@@ -1322,71 +1404,23 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- VFIOBAR *bar = &vdev->bars[i];
-
- if (!bar->region.size) {
- continue;
- }
-
- memory_region_set_enabled(&bar->region.mmap_mem, enabled);
- if (vdev->msix && vdev->msix->table_bar == i) {
- memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
- }
- }
-}
-
-static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr)
-{
- VFIOBAR *bar = &vdev->bars[nr];
-
- if (!bar->region.size) {
- return;
- }
-
- vfio_bar_quirk_teardown(vdev, nr);
-
- memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
-
- if (vdev->msix && vdev->msix->table_bar == nr) {
- memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
+ vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled);
}
}
-static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
- if (!bar->region.size) {
- return;
- }
-
- vfio_bar_quirk_free(vdev, nr);
-
- munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
-
- if (vdev->msix && vdev->msix->table_bar == nr) {
- munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
- }
-}
-
-static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
-{
- VFIOBAR *bar = &vdev->bars[nr];
- uint64_t size = bar->region.size;
- char name[64];
uint32_t pci_bar;
uint8_t type;
int ret;
/* Skip both unimplemented BARs and the upper half of 64bit BARS. */
- if (!size) {
+ if (!bar->region.size) {
return;
}
- snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function, nr);
-
/* Determine what type of BAR this is for registration */
ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
@@ -1401,102 +1435,78 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
~PCI_BASE_ADDRESS_MEM_MASK);
- /* A "slow" read/write mapping underlies all BARs */
- memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops,
- bar, name, size);
- pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem);
-
- /*
- * We can't mmap areas overlapping the MSIX vector table, so we
- * potentially insert a direct-mapped subregion before and after it.
- */
- if (vdev->msix && vdev->msix->table_bar == nr) {
- size = vdev->msix->table_offset & qemu_real_host_page_mask;
- }
-
- strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
- if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
- &bar->region.mmap_mem, &bar->region.mmap,
- size, 0, name)) {
- error_report("%s unsupported. Performance may be slow", name);
- }
-
- if (vdev->msix && vdev->msix->table_bar == nr) {
- uint64_t start;
-
- start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
- (vdev->msix->entries *
- PCI_MSIX_ENTRY_SIZE));
-
- size = start < bar->region.size ? bar->region.size - start : 0;
- strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
- /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
- if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
- &vdev->msix->mmap_mem,
- &vdev->msix->mmap, size, start, name)) {
- error_report("%s unsupported. Performance may be slow", name);
- }
+ if (vfio_region_mmap(&bar->region)) {
+ error_report("Failed to mmap %s BAR %d. Performance may be slow",
+ vdev->vbasedev.name, nr);
}
vfio_bar_quirk_setup(vdev, nr);
+
+ pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
}
-static void vfio_map_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_setup(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_map_bar(vdev, i);
+ vfio_bar_setup(vdev, i);
}
- if (vdev->has_vga) {
- memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem,
+ if (vdev->vga) {
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga.region[QEMU_PCI_VGA_MEM],
+ &vdev->vga->region[QEMU_PCI_VGA_MEM],
"vfio-vga-mmio@0xa0000",
QEMU_PCI_VGA_MEM_SIZE);
- memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem,
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga.region[QEMU_PCI_VGA_IO_LO],
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO],
"vfio-vga-io@0x3b0",
QEMU_PCI_VGA_IO_LO_SIZE);
- memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI],
"vfio-vga-io@0x3c0",
QEMU_PCI_VGA_IO_HI_SIZE);
- pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem,
- &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem,
- &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem);
+ pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
vfio_vga_quirk_setup(vdev);
}
}
-static void vfio_unregister_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_exit(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_unregister_bar(vdev, i);
+ vfio_bar_quirk_exit(vdev, i);
+ vfio_region_exit(&vdev->bars[i].region);
}
- if (vdev->has_vga) {
- vfio_vga_quirk_teardown(vdev);
+ if (vdev->vga) {
pci_unregister_vga(&vdev->pdev);
+ vfio_vga_quirk_exit(vdev);
}
}
-static void vfio_unmap_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_finalize(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_unmap_bar(vdev, i);
+ vfio_bar_quirk_finalize(vdev, i);
+ vfio_region_finalize(&vdev->bars[i].region);
}
- if (vdev->has_vga) {
- vfio_vga_quirk_free(vdev);
+ if (vdev->vga) {
+ vfio_vga_quirk_finalize(vdev);
+ for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
+ object_unparent(OBJECT(&vdev->vga->region[i].mem));
+ }
+ g_free(vdev->vga);
}
}
@@ -1756,9 +1766,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
}
if (ret < 0) {
- error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
- "0x%x[0x%x]@0x%x: %d", vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function,
+ error_report("vfio: %s Error adding PCI capability "
+ "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name,
cap_id, size, pos, ret);
return ret;
}
@@ -1820,11 +1829,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
vfio_intx_enable(vdev);
}
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
- PCIHostDeviceAddress *host2)
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
{
- return (host1->domain == host2->domain && host1->bus == host2->bus &&
- host1->slot == host2->slot && host1->function == host2->function);
+ char tmp[13];
+
+ sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
+ addr->bus, addr->slot, addr->function);
+
+ return (strcmp(tmp, name) == 0);
}
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
@@ -1849,9 +1861,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
if (ret && errno != ENOSPC) {
ret = -errno;
if (!vdev->has_pm_reset) {
- error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
- "no available reset mechanism.", vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function);
+ error_report("vfio: Cannot reset device %s, "
+ "no available reset mechanism.", vdev->vbasedev.name);
}
goto out_single;
}
@@ -1884,7 +1895,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
trace_vfio_pci_hot_reset_dep_devices(host.domain,
host.bus, host.slot, host.function, devices[i].group_id);
- if (vfio_pci_host_match(&host, &vdev->host)) {
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
@@ -1910,7 +1921,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, &tmp->host)) {
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
if (single) {
ret = -EINVAL;
goto out_single;
@@ -1972,7 +1983,7 @@ out:
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
- if (vfio_pci_host_match(&host, &vdev->host)) {
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
@@ -1991,7 +2002,7 @@ out:
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, &tmp->host)) {
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
vfio_pci_post_reset(tmp);
break;
}
@@ -2044,10 +2055,56 @@ static VFIODeviceOps vfio_pci_ops = {
.vfio_eoi = vfio_intx_eoi,
};
+int vfio_populate_vga(VFIOPCIDevice *vdev)
+{
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ struct vfio_region_info *reg_info;
+ int ret;
+
+ if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) {
+ ret = vfio_get_region_info(vbasedev,
+ VFIO_PCI_VGA_REGION_INDEX, &reg_info);
+ if (ret) {
+ return ret;
+ }
+
+ if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
+ !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
+ reg_info->size < 0xbffff + 1) {
+ error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
+ (unsigned long)reg_info->flags,
+ (unsigned long)reg_info->size);
+ g_free(reg_info);
+ return -EINVAL;
+ }
+
+ vdev->vga = g_new0(VFIOVGA, 1);
+
+ vdev->vga->fd_offset = reg_info->offset;
+ vdev->vga->fd = vdev->vbasedev.fd;
+
+ g_free(reg_info);
+
+ vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks);
+
+ vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks);
+
+ vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
+ }
+
+ return 0;
+}
+
static int vfio_populate_device(VFIOPCIDevice *vdev)
{
VFIODevice *vbasedev = &vdev->vbasedev;
- struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+ struct vfio_region_info *reg_info;
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
int i, ret = -1;
@@ -2069,85 +2126,47 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
}
for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
- reg_info.index = i;
+ char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i);
+
+ ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+ &vdev->bars[i].region, i, name);
+ g_free(name);
- ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
if (ret) {
error_report("vfio: Error getting region %d info: %m", i);
goto error;
}
- trace_vfio_populate_device_region(vbasedev->name, i,
- (unsigned long)reg_info.size,
- (unsigned long)reg_info.offset,
- (unsigned long)reg_info.flags);
-
- vdev->bars[i].region.vbasedev = vbasedev;
- vdev->bars[i].region.flags = reg_info.flags;
- vdev->bars[i].region.size = reg_info.size;
- vdev->bars[i].region.fd_offset = reg_info.offset;
- vdev->bars[i].region.nr = i;
QLIST_INIT(&vdev->bars[i].quirks);
}
- reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+ ret = vfio_get_region_info(vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX, &reg_info);
if (ret) {
error_report("vfio: Error getting config info: %m");
goto error;
}
trace_vfio_populate_device_config(vdev->vbasedev.name,
- (unsigned long)reg_info.size,
- (unsigned long)reg_info.offset,
- (unsigned long)reg_info.flags);
+ (unsigned long)reg_info->size,
+ (unsigned long)reg_info->offset,
+ (unsigned long)reg_info->flags);
- vdev->config_size = reg_info.size;
+ vdev->config_size = reg_info->size;
if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) {
vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS;
}
- vdev->config_offset = reg_info.offset;
+ vdev->config_offset = reg_info->offset;
- if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) &&
- vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) {
- struct vfio_region_info vga_info = {
- .argsz = sizeof(vga_info),
- .index = VFIO_PCI_VGA_REGION_INDEX,
- };
+ g_free(reg_info);
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info);
+ if (vdev->features & VFIO_FEATURE_ENABLE_VGA) {
+ ret = vfio_populate_vga(vdev);
if (ret) {
error_report(
"vfio: Device does not support requested feature x-vga");
goto error;
}
-
- if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) ||
- !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) ||
- vga_info.size < 0xbffff + 1) {
- error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
- (unsigned long)vga_info.flags,
- (unsigned long)vga_info.size);
- goto error;
- }
-
- vdev->vga.fd_offset = vga_info.offset;
- vdev->vga.fd = vdev->vbasedev.fd;
-
- vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
- vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
- QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks);
-
- vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
- vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
- QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks);
-
- vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
- vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
- QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks);
-
- vdev->has_vga = true;
}
irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
@@ -2172,11 +2191,8 @@ error:
static void vfio_put_device(VFIOPCIDevice *vdev)
{
g_free(vdev->vbasedev.name);
- if (vdev->msix) {
- object_unparent(OBJECT(&vdev->msix->mmap_mem));
- g_free(vdev->msix);
- vdev->msix = NULL;
- }
+ g_free(vdev->msix);
+
vfio_put_base_device(&vdev->vbasedev);
}
@@ -2197,10 +2213,7 @@ static void vfio_err_notifier_handler(void *opaque)
* guest to contain the error.
*/
- error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
- "Please collect any data possible and then kill the guest",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
+ error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
@@ -2381,42 +2394,43 @@ static int vfio_initfn(PCIDevice *pdev)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
- char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ char *tmp, group_path[PATH_MAX], *group_name;
ssize_t len;
struct stat st;
int groupid;
int ret;
- /* Check that the host device exists */
- snprintf(path, sizeof(path),
- "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
- if (stat(path, &st) < 0) {
- error_report("vfio: error: no such host device: %s", path);
+ if (!vdev->vbasedev.sysfsdev) {
+ vdev->vbasedev.sysfsdev =
+ g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
+ vdev->host.domain, vdev->host.bus,
+ vdev->host.slot, vdev->host.function);
+ }
+
+ if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
+ error_report("vfio: error: no such host device: %s",
+ vdev->vbasedev.sysfsdev);
return -errno;
}
+ vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
vdev->vbasedev.ops = &vfio_pci_ops;
-
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
- vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x",
- vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
- strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+ tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
- len = readlink(path, iommu_group_path, sizeof(path));
- if (len <= 0 || len >= sizeof(path)) {
+ if (len <= 0 || len >= sizeof(group_path)) {
error_report("vfio: error no iommu_group for device");
return len < 0 ? -errno : -ENAMETOOLONG;
}
- iommu_group_path[len] = 0;
- group_name = basename(iommu_group_path);
+ group_path[len] = 0;
+ group_name = basename(group_path);
if (sscanf(group_name, "%d", &groupid) != 1) {
- error_report("vfio: error reading %s: %m", path);
+ error_report("vfio: error reading %s: %m", group_path);
return -errno;
}
@@ -2428,21 +2442,18 @@ static int vfio_initfn(PCIDevice *pdev)
return -ENOENT;
}
- snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
-
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
- error_report("vfio: error: device %s is already attached", path);
+ error_report("vfio: error: device %s is already attached",
+ vdev->vbasedev.name);
vfio_put_group(group);
return -EBUSY;
}
}
- ret = vfio_get_device(group, path, &vdev->vbasedev);
+ ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev);
if (ret) {
- error_report("vfio: failed to get device %s", path);
+ error_report("vfio: failed to get device %s", vdev->vbasedev.name);
vfio_put_group(group);
return ret;
}
@@ -2542,7 +2553,7 @@ static int vfio_initfn(PCIDevice *pdev)
return ret;
}
- vfio_map_bars(vdev);
+ vfio_bars_setup(vdev);
ret = vfio_add_capabilities(vdev);
if (ret) {
@@ -2579,7 +2590,7 @@ static int vfio_initfn(PCIDevice *pdev)
out_teardown:
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_teardown_msi(vdev);
- vfio_unregister_bars(vdev);
+ vfio_bars_exit(vdev);
return ret;
}
@@ -2589,7 +2600,7 @@ static void vfio_instance_finalize(Object *obj)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
VFIOGroup *group = vdev->vbasedev.group;
- vfio_unmap_bars(vdev);
+ vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
vfio_put_device(vdev);
@@ -2608,7 +2619,7 @@ static void vfio_exitfn(PCIDevice *pdev)
timer_free(vdev->intx.mmap_timer);
}
vfio_teardown_msi(vdev);
- vfio_unregister_bars(vdev);
+ vfio_bars_exit(vdev);
}
static void vfio_pci_reset(DeviceState *dev)
@@ -2659,6 +2670,7 @@ static void vfio_instance_init(Object *obj)
static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
+ DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
intx.mmap_timeout, 1100),
DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,