diff options
Diffstat (limited to 'hw/vfio/pci.c')
-rw-r--r-- | hw/vfio/pci.c | 528 |
1 files changed, 270 insertions, 258 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 20b505f4ec..d091d8cf0e 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -783,25 +783,25 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { - struct vfio_region_info reg_info = { - .argsz = sizeof(reg_info), - .index = VFIO_PCI_ROM_REGION_INDEX - }; + struct vfio_region_info *reg_info; uint64_t size; off_t off = 0; ssize_t bytes; - if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { + if (vfio_get_region_info(&vdev->vbasedev, + VFIO_PCI_ROM_REGION_INDEX, ®_info)) { error_report("vfio: Error getting ROM info: %m"); return; } - trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); + + vdev->rom_size = size = reg_info->size; + vdev->rom_offset = reg_info->offset; - vdev->rom_size = size = reg_info.size; - vdev->rom_offset = reg_info.offset; + g_free(reg_info); if (!vdev->rom_size) { vdev->rom_read_failed = true; @@ -832,6 +832,36 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) break; } } + + /* + * Test the ROM signature against our device, if the vendor is correct + * but the device ID doesn't match, store the correct device ID and + * recompute the checksum. Intel IGD devices need this and are known + * to have bogus checksums so we can't simply adjust the checksum. + */ + if (pci_get_word(vdev->rom) == 0xaa55 && + pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size && + !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) { + uint16_t vid, did; + + vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4); + did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6); + + if (vid == vdev->vendor_id && did != vdev->device_id) { + int i; + uint8_t csum, *data = vdev->rom; + + pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6, + vdev->device_id); + data[6] = 0; + + for (csum = 0, i = 0; i < vdev->rom_size; i++) { + csum += data[i]; + } + + data[6] = -csum; + } + } } static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) @@ -889,18 +919,14 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; DeviceState *dev = DEVICE(vdev); - char name[32]; + char *name; int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified romfile\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n", + vdev->vbasedev.name); } return; } @@ -913,9 +939,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) pwrite(fd, &size, 4, offset) != 4 || pread(fd, &size, 4, offset) != 4 || pwrite(fd, &orig, 4, offset) != 4) { - error_report("%s(%04x:%02x:%02x.%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); return; } @@ -927,32 +951,22 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vfio_blacklist_opt_rom(vdev)) { if (dev->opts && qemu_opt_get(dev->opts, "rombar")) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified non zero value for " - "rombar\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n", + vdev->vbasedev.name); } else { - error_printf("Warning : Rom loading for device at " - "%04x:%02x:%02x.%x has been disabled due to " - "system instability issues. " - "Specify rombar=1 or romfile to force\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n", + vdev->vbasedev.name); return; } } trace_vfio_pci_size_rom(vdev->vbasedev.name, size); - snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name); memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), &vfio_rom_ops, vdev, name, size); + g_free(name); pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); @@ -1063,9 +1077,8 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) ret = pread(vdev->vbasedev.fd, &phys_val, len, vdev->config_offset + addr); if (ret != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, len); + error_report("%s(%s, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, len); return -errno; } phys_val = le32_to_cpu(phys_val); @@ -1089,9 +1102,8 @@ void vfio_pci_write_config(PCIDevice *pdev, /* Write everything to VFIO, let it filter out what we can't write */ if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, val, len); + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, val, len); } /* MSI/MSI-X Enabling/Disabling */ @@ -1185,6 +1197,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) return 0; } +static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev) +{ + off_t start, end; + VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region; + + /* + * We expect to find a single mmap covering the whole BAR, anything else + * means it's either unsupported or already setup. + */ + if (region->nr_mmaps != 1 || region->mmaps[0].offset || + region->size != region->mmaps[0].size) { + return; + } + + /* MSI-X table start and end aligned to host page size */ + start = vdev->msix->table_offset & qemu_real_host_page_mask; + end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + + /* + * Does the MSI-X table cover the beginning of the BAR? The whole BAR? + * NB - Host page size is necessarily a power of two and so is the PCI + * BAR (not counting EA yet), therefore if we have host page aligned + * @start and @end, then any remainder of the BAR before or after those + * must be at least host page sized and therefore mmap'able. + */ + if (!start) { + if (end >= region->size) { + region->nr_mmaps = 0; + g_free(region->mmaps); + region->mmaps = NULL; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, 0, 0); + } else { + region->mmaps[0].offset = end; + region->mmaps[0].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + } + + /* Maybe it's aligned at the end of the BAR */ + } else if (end >= region->size) { + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + /* Otherwise it must split the BAR */ + } else { + region->nr_mmaps = 2; + region->mmaps = g_renew(VFIOMmap, region->mmaps, 2); + + memcpy(®ion->mmaps[1], ®ion->mmaps[0], sizeof(VFIOMmap)); + + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + region->mmaps[1].offset = end; + region->mmaps[1].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[1].offset, + region->mmaps[1].offset + region->mmaps[1].size); + } +} + /* * We don't have any control over how pci_add_capability() inserts * capabilities into the chain. In order to setup MSI-X we need a @@ -1259,6 +1339,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev) msix->table_offset, msix->entries); vdev->msix = msix; + vfio_pci_fixup_msix_region(vdev); + return 0; } @@ -1269,9 +1351,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); ret = msix_init(&vdev->pdev, vdev->msix->entries, - &vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->table_bar].region.mem, vdev->msix->table_bar, vdev->msix->table_offset, - &vdev->bars[vdev->msix->pba_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem, vdev->msix->pba_bar, vdev->msix->pba_offset, pos); if (ret < 0) { if (ret == -ENOTSUP) { @@ -1308,8 +1390,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) if (vdev->msix) { msix_uninit(&vdev->pdev, - &vdev->bars[vdev->msix->table_bar].region.mem, - &vdev->bars[vdev->msix->pba_bar].region.mem); + vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem); g_free(vdev->msix->pending); } } @@ -1322,71 +1404,23 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - VFIOBAR *bar = &vdev->bars[i]; - - if (!bar->region.size) { - continue; - } - - memory_region_set_enabled(&bar->region.mmap_mem, enabled); - if (vdev->msix && vdev->msix->table_bar == i) { - memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); - } - } -} - -static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_teardown(vdev, nr); - - memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); - - if (vdev->msix && vdev->msix->table_bar == nr) { - memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); + vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled); } } -static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) +static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_free(vdev, nr); - - munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); - - if (vdev->msix && vdev->msix->table_bar == nr) { - munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); - } -} - -static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - uint64_t size = bar->region.size; - char name[64]; uint32_t pci_bar; uint8_t type; int ret; /* Skip both unimplemented BARs and the upper half of 64bit BARS. */ - if (!size) { + if (!bar->region.size) { return; } - snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, nr); - /* Determine what type of BAR this is for registration */ ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); @@ -1401,102 +1435,78 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); - /* A "slow" read/write mapping underlies all BARs */ - memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops, - bar, name, size); - pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem); - - /* - * We can't mmap areas overlapping the MSIX vector table, so we - * potentially insert a direct-mapped subregion before and after it. - */ - if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & qemu_real_host_page_mask; - } - - strncat(name, " mmap", sizeof(name) - strlen(name) - 1); - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &bar->region.mmap_mem, &bar->region.mmap, - size, 0, name)) { - error_report("%s unsupported. Performance may be slow", name); - } - - if (vdev->msix && vdev->msix->table_bar == nr) { - uint64_t start; - - start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + - (vdev->msix->entries * - PCI_MSIX_ENTRY_SIZE)); - - size = start < bar->region.size ? bar->region.size - start : 0; - strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); - /* VFIOMSIXInfo contains another MemoryRegion for this mapping */ - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &vdev->msix->mmap_mem, - &vdev->msix->mmap, size, start, name)) { - error_report("%s unsupported. Performance may be slow", name); - } + if (vfio_region_mmap(&bar->region)) { + error_report("Failed to mmap %s BAR %d. Performance may be slow", + vdev->vbasedev.name, nr); } vfio_bar_quirk_setup(vdev, nr); + + pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); } -static void vfio_map_bars(VFIOPCIDevice *vdev) +static void vfio_bars_setup(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_map_bar(vdev, i); + vfio_bar_setup(vdev, i); } - if (vdev->has_vga) { - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem, + if (vdev->vga) { + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_MEM], + &vdev->vga->region[QEMU_PCI_VGA_MEM], "vfio-vga-mmio@0xa0000", QEMU_PCI_VGA_MEM_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO], + &vdev->vga->region[QEMU_PCI_VGA_IO_LO], "vfio-vga-io@0x3b0", QEMU_PCI_VGA_IO_LO_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI], + &vdev->vga->region[QEMU_PCI_VGA_IO_HI], "vfio-vga-io@0x3c0", QEMU_PCI_VGA_IO_HI_SIZE); - pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem); + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); vfio_vga_quirk_setup(vdev); } } -static void vfio_unregister_bars(VFIOPCIDevice *vdev) +static void vfio_bars_exit(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unregister_bar(vdev, i); + vfio_bar_quirk_exit(vdev, i); + vfio_region_exit(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_teardown(vdev); + if (vdev->vga) { pci_unregister_vga(&vdev->pdev); + vfio_vga_quirk_exit(vdev); } } -static void vfio_unmap_bars(VFIOPCIDevice *vdev) +static void vfio_bars_finalize(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unmap_bar(vdev, i); + vfio_bar_quirk_finalize(vdev, i); + vfio_region_finalize(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_free(vdev); + if (vdev->vga) { + vfio_vga_quirk_finalize(vdev); + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + object_unparent(OBJECT(&vdev->vga->region[i].mem)); + } + g_free(vdev->vga); } } @@ -1756,9 +1766,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) } if (ret < 0) { - error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability " - "0x%x[0x%x]@0x%x: %d", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, + error_report("vfio: %s Error adding PCI capability " + "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name, cap_id, size, pos, ret); return ret; } @@ -1820,11 +1829,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) vfio_intx_enable(vdev); } -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, - PCIHostDeviceAddress *host2) +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) { - return (host1->domain == host2->domain && host1->bus == host2->bus && - host1->slot == host2->slot && host1->function == host2->function); + char tmp[13]; + + sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain, + addr->bus, addr->slot, addr->function); + + return (strcmp(tmp, name) == 0); } static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) @@ -1849,9 +1861,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) if (ret && errno != ENOSPC) { ret = -errno; if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, " - "no available reset mechanism.", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + error_report("vfio: Cannot reset device %s, " + "no available reset mechanism.", vdev->vbasedev.name); } goto out_single; } @@ -1884,7 +1895,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) trace_vfio_pci_hot_reset_dep_devices(host.domain, host.bus, host.slot, host.function, devices[i].group_id); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1910,7 +1921,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { if (single) { ret = -EINVAL; goto out_single; @@ -1972,7 +1983,7 @@ out: host.slot = PCI_SLOT(devices[i].devfn); host.function = PCI_FUNC(devices[i].devfn); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1991,7 +2002,7 @@ out: continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { vfio_pci_post_reset(tmp); break; } @@ -2044,10 +2055,56 @@ static VFIODeviceOps vfio_pci_ops = { .vfio_eoi = vfio_intx_eoi, }; +int vfio_populate_vga(VFIOPCIDevice *vdev) +{ + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info; + int ret; + + if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_VGA_REGION_INDEX, ®_info); + if (ret) { + return ret; + } + + if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || + !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || + reg_info->size < 0xbffff + 1) { + error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", + (unsigned long)reg_info->flags, + (unsigned long)reg_info->size); + g_free(reg_info); + return -EINVAL; + } + + vdev->vga = g_new0(VFIOVGA, 1); + + vdev->vga->fd_offset = reg_info->offset; + vdev->vga->fd = vdev->vbasedev.fd; + + g_free(reg_info); + + vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; + vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); + } + + return 0; +} + static int vfio_populate_device(VFIOPCIDevice *vdev) { VFIODevice *vbasedev = &vdev->vbasedev; - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; + struct vfio_region_info *reg_info; struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; int i, ret = -1; @@ -2069,85 +2126,47 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) } for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) { - reg_info.index = i; + char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i); + + ret = vfio_region_setup(OBJECT(vdev), vbasedev, + &vdev->bars[i].region, i, name); + g_free(name); - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto error; } - trace_vfio_populate_device_region(vbasedev->name, i, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); - - vdev->bars[i].region.vbasedev = vbasedev; - vdev->bars[i].region.flags = reg_info.flags; - vdev->bars[i].region.size = reg_info.size; - vdev->bars[i].region.fd_offset = reg_info.offset; - vdev->bars[i].region.nr = i; QLIST_INIT(&vdev->bars[i].quirks); } - reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, ®_info); if (ret) { error_report("vfio: Error getting config info: %m"); goto error; } trace_vfio_populate_device_config(vdev->vbasedev.name, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); - vdev->config_size = reg_info.size; + vdev->config_size = reg_info->size; if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; } - vdev->config_offset = reg_info.offset; + vdev->config_offset = reg_info->offset; - if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) && - vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { - struct vfio_region_info vga_info = { - .argsz = sizeof(vga_info), - .index = VFIO_PCI_VGA_REGION_INDEX, - }; + g_free(reg_info); - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info); + if (vdev->features & VFIO_FEATURE_ENABLE_VGA) { + ret = vfio_populate_vga(vdev); if (ret) { error_report( "vfio: Device does not support requested feature x-vga"); goto error; } - - if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) || - !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) || - vga_info.size < 0xbffff + 1) { - error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", - (unsigned long)vga_info.flags, - (unsigned long)vga_info.size); - goto error; - } - - vdev->vga.fd_offset = vga_info.offset; - vdev->vga.fd = vdev->vbasedev.fd; - - vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; - vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks); - - vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks); - - vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks); - - vdev->has_vga = true; } irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; @@ -2172,11 +2191,8 @@ error: static void vfio_put_device(VFIOPCIDevice *vdev) { g_free(vdev->vbasedev.name); - if (vdev->msix) { - object_unparent(OBJECT(&vdev->msix->mmap_mem)); - g_free(vdev->msix); - vdev->msix = NULL; - } + g_free(vdev->msix); + vfio_put_base_device(&vdev->vbasedev); } @@ -2197,10 +2213,7 @@ static void vfio_err_notifier_handler(void *opaque) * guest to contain the error. */ - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. " - "Please collect any data possible and then kill the guest", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name); vm_stop(RUN_STATE_INTERNAL_ERROR); } @@ -2381,42 +2394,43 @@ static int vfio_initfn(PCIDevice *pdev) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIODevice *vbasedev_iter; VFIOGroup *group; - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; + char *tmp, group_path[PATH_MAX], *group_name; ssize_t len; struct stat st; int groupid; int ret; - /* Check that the host device exists */ - snprintf(path, sizeof(path), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - if (stat(path, &st) < 0) { - error_report("vfio: error: no such host device: %s", path); + if (!vdev->vbasedev.sysfsdev) { + vdev->vbasedev.sysfsdev = + g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x", + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + } + + if (stat(vdev->vbasedev.sysfsdev, &st) < 0) { + error_report("vfio: error: no such host device: %s", + vdev->vbasedev.sysfsdev); return -errno; } + vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev)); vdev->vbasedev.ops = &vfio_pci_ops; - vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI; - vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); - strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); + tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); - len = readlink(path, iommu_group_path, sizeof(path)); - if (len <= 0 || len >= sizeof(path)) { + if (len <= 0 || len >= sizeof(group_path)) { error_report("vfio: error no iommu_group for device"); return len < 0 ? -errno : -ENAMETOOLONG; } - iommu_group_path[len] = 0; - group_name = basename(iommu_group_path); + group_path[len] = 0; + group_name = basename(group_path); if (sscanf(group_name, "%d", &groupid) != 1) { - error_report("vfio: error reading %s: %m", path); + error_report("vfio: error reading %s: %m", group_path); return -errno; } @@ -2428,21 +2442,18 @@ static int vfio_initfn(PCIDevice *pdev) return -ENOENT; } - snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) { - error_report("vfio: error: device %s is already attached", path); + error_report("vfio: error: device %s is already attached", + vdev->vbasedev.name); vfio_put_group(group); return -EBUSY; } } - ret = vfio_get_device(group, path, &vdev->vbasedev); + ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev); if (ret) { - error_report("vfio: failed to get device %s", path); + error_report("vfio: failed to get device %s", vdev->vbasedev.name); vfio_put_group(group); return ret; } @@ -2542,7 +2553,7 @@ static int vfio_initfn(PCIDevice *pdev) return ret; } - vfio_map_bars(vdev); + vfio_bars_setup(vdev); ret = vfio_add_capabilities(vdev); if (ret) { @@ -2579,7 +2590,7 @@ static int vfio_initfn(PCIDevice *pdev) out_teardown: pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); return ret; } @@ -2589,7 +2600,7 @@ static void vfio_instance_finalize(Object *obj) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev); VFIOGroup *group = vdev->vbasedev.group; - vfio_unmap_bars(vdev); + vfio_bars_finalize(vdev); g_free(vdev->emulated_config_bits); g_free(vdev->rom); vfio_put_device(vdev); @@ -2608,7 +2619,7 @@ static void vfio_exitfn(PCIDevice *pdev) timer_free(vdev->intx.mmap_timer); } vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); } static void vfio_pci_reset(DeviceState *dev) @@ -2659,6 +2670,7 @@ static void vfio_instance_init(Object *obj) static Property vfio_pci_dev_properties[] = { DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice, intx.mmap_timeout, 1100), DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features, |