aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio/container.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/container.c')
-rw-r--r--hw/vfio/container.c329
1 files changed, 249 insertions, 80 deletions
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 242010036a..b22feb8ded 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -33,6 +33,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "migration/migration.h"
+#include "pci.h"
VFIOGroupList vfio_group_list =
QLIST_HEAD_INITIALIZER(vfio_group_list);
@@ -60,10 +61,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
}
}
-static int vfio_dma_unmap_bitmap(VFIOContainer *container,
+static int vfio_dma_unmap_bitmap(const VFIOContainer *container,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb)
{
+ const VFIOContainerBase *bcontainer = &container->bcontainer;
struct vfio_iommu_type1_dma_unmap *unmap;
struct vfio_bitmap *bitmap;
VFIOBitmap vbmap;
@@ -91,7 +93,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
bitmap->size = vbmap.size;
bitmap->data = (__u64 *)vbmap.bitmap;
- if (vbmap.size > container->max_dirty_bitmap_size) {
+ if (vbmap.size > bcontainer->max_dirty_bitmap_size) {
error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
ret = -E2BIG;
goto unmap_exit;
@@ -115,9 +117,12 @@ unmap_exit:
/*
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
*/
-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
- ram_addr_t size, IOMMUTLBEntry *iotlb)
+static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb)
{
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
+ bcontainer);
struct vfio_iommu_type1_dma_unmap unmap = {
.argsz = sizeof(unmap),
.flags = 0,
@@ -127,9 +132,9 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
bool need_dirty_sync = false;
int ret;
- if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
- if (!vfio_devices_all_device_dirty_tracking(container) &&
- container->dirty_pages_supported) {
+ if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) {
+ if (!vfio_devices_all_device_dirty_tracking(bcontainer) &&
+ bcontainer->dirty_pages_supported) {
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
}
@@ -151,8 +156,8 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
*/
if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
container->iommu_type == VFIO_TYPE1v2_IOMMU) {
- trace_vfio_dma_unmap_overflow_workaround();
- unmap.size -= 1ULL << ctz64(container->pgsizes);
+ trace_vfio_legacy_dma_unmap_overflow_workaround();
+ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes);
continue;
}
error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
@@ -160,7 +165,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
}
if (need_dirty_sync) {
- ret = vfio_get_dirty_bitmap(container, iova, size,
+ ret = vfio_get_dirty_bitmap(bcontainer, iova, size,
iotlb->translated_addr);
if (ret) {
return ret;
@@ -170,9 +175,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
return 0;
}
-int vfio_dma_map(VFIOContainer *container, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly)
{
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
+ bcontainer);
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
.flags = VFIO_DMA_MAP_FLAG_READ,
@@ -191,7 +198,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova,
* the VGA ROM space.
*/
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
+ (errno == EBUSY &&
+ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 &&
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
return 0;
}
@@ -200,17 +208,17 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova,
return -errno;
}
-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
+static int
+vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+ bool start)
{
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
+ bcontainer);
int ret;
struct vfio_iommu_type1_dirty_bitmap dirty = {
.argsz = sizeof(dirty),
};
- if (!container->dirty_pages_supported) {
- return 0;
- }
-
if (start) {
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
} else {
@@ -227,9 +235,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
return ret;
}
-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
- hwaddr iova, hwaddr size)
+static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+ VFIOBitmap *vbmap,
+ hwaddr iova, hwaddr size)
{
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
+ bcontainer);
struct vfio_iommu_type1_dirty_bitmap *dbitmap;
struct vfio_iommu_type1_dirty_bitmap_get *range;
int ret;
@@ -296,7 +307,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
}
static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
- VFIOContainer *container)
+ VFIOContainerBase *bcontainer)
{
struct vfio_info_cap_header *hdr;
struct vfio_iommu_type1_info_cap_iova_range *cap;
@@ -314,8 +325,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
range_set_bounds(range, cap->iova_ranges[i].start,
cap->iova_ranges[i].end);
- container->iova_ranges =
- range_list_insert(container->iova_ranges, range);
+ bcontainer->iova_ranges =
+ range_list_insert(bcontainer->iova_ranges, range);
}
return true;
@@ -442,6 +453,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
{
struct vfio_info_cap_header *hdr;
struct vfio_iommu_type1_info_cap_migration *cap_mig;
+ VFIOContainerBase *bcontainer = &container->bcontainer;
hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
if (!hdr) {
@@ -456,22 +468,17 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
* qemu_real_host_page_size to mark those dirty.
*/
if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
- container->dirty_pages_supported = true;
- container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
- container->dirty_pgsizes = cap_mig->pgsize_bitmap;
+ bcontainer->dirty_pages_supported = true;
+ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
+ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap;
}
}
-static void vfio_free_container(VFIOContainer *container)
-{
- g_list_free_full(container->iova_ranges, g_free);
- g_free(container);
-}
-
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
Error **errp)
{
VFIOContainer *container;
+ VFIOContainerBase *bcontainer;
int ret, fd;
VFIOAddressSpace *space;
@@ -508,7 +515,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
* details once we know which type of IOMMU we are using.
*/
- QLIST_FOREACH(container, &space->containers, next) {
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = container_of(bcontainer, VFIOContainer, bcontainer);
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
ret = vfio_ram_block_discard_disable(container, true);
if (ret) {
@@ -544,14 +552,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
}
container = g_malloc0(sizeof(*container));
- container->space = space;
container->fd = fd;
- container->error = NULL;
- container->dirty_pages_supported = false;
- container->dma_max_mappings = 0;
- container->iova_ranges = NULL;
- QLIST_INIT(&container->giommu_list);
- QLIST_INIT(&container->vrdl_list);
+ bcontainer = &container->bcontainer;
+ vfio_container_init(bcontainer, space, &vfio_legacy_ops);
ret = vfio_init_container(container, group->fd, errp);
if (ret) {
@@ -577,16 +580,16 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
}
if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
- container->pgsizes = info->iova_pgsizes;
+ bcontainer->pgsizes = info->iova_pgsizes;
} else {
- container->pgsizes = qemu_real_host_page_size();
+ bcontainer->pgsizes = qemu_real_host_page_size();
}
- if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
- container->dma_max_mappings = 65535;
+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
+ bcontainer->dma_max_mappings = 65535;
}
- vfio_get_info_iova_range(info, container);
+ vfio_get_info_iova_range(info, bcontainer);
vfio_get_iommu_info_migration(container, info);
g_free(info);
@@ -606,30 +609,29 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
vfio_kvm_device_add_group(group);
QLIST_INIT(&container->group_list);
- QLIST_INSERT_HEAD(&space->containers, container, next);
+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
- container->listener = vfio_memory_listener;
+ bcontainer->listener = vfio_memory_listener;
+ memory_listener_register(&bcontainer->listener, bcontainer->space->as);
- memory_listener_register(&container->listener, container->space->as);
-
- if (container->error) {
+ if (bcontainer->error) {
ret = -1;
- error_propagate_prepend(errp, container->error,
+ error_propagate_prepend(errp, bcontainer->error,
"memory listener initialization failed: ");
goto listener_release_exit;
}
- container->initialized = true;
+ bcontainer->initialized = true;
return 0;
listener_release_exit:
QLIST_REMOVE(group, container_next);
- QLIST_REMOVE(container, next);
+ QLIST_REMOVE(bcontainer, next);
vfio_kvm_device_del_group(group);
- memory_listener_unregister(&container->listener);
+ memory_listener_unregister(&bcontainer->listener);
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
vfio_spapr_container_deinit(container);
@@ -639,7 +641,7 @@ enable_discards_exit:
vfio_ram_block_discard_disable(container, false);
free_container_exit:
- vfio_free_container(container);
+ g_free(container);
close_fd_exit:
close(fd);
@@ -653,6 +655,7 @@ put_space_exit:
static void vfio_disconnect_container(VFIOGroup *group)
{
VFIOContainer *container = group->container;
+ VFIOContainerBase *bcontainer = &container->bcontainer;
QLIST_REMOVE(group, container_next);
group->container = NULL;
@@ -663,7 +666,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
* group.
*/
if (QLIST_EMPTY(&container->group_list)) {
- memory_listener_unregister(&container->listener);
+ memory_listener_unregister(&bcontainer->listener);
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
vfio_spapr_container_deinit(container);
@@ -676,21 +679,13 @@ static void vfio_disconnect_container(VFIOGroup *group)
}
if (QLIST_EMPTY(&container->group_list)) {
- VFIOAddressSpace *space = container->space;
- VFIOGuestIOMMU *giommu, *tmp;
+ VFIOAddressSpace *space = bcontainer->space;
- QLIST_REMOVE(container, next);
-
- QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
- memory_region_unregister_iommu_notifier(
- MEMORY_REGION(giommu->iommu_mr), &giommu->n);
- QLIST_REMOVE(giommu, giommu_next);
- g_free(giommu);
- }
+ vfio_container_destroy(bcontainer);
trace_vfio_disconnect_container(container->fd);
close(container->fd);
- vfio_free_container(container);
+ g_free(container);
vfio_put_address_space(space);
}
@@ -705,7 +700,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
QLIST_FOREACH(group, &vfio_group_list, next) {
if (group->groupid == groupid) {
/* Found it. Now is it already in the right context? */
- if (group->container->space->as == as) {
+ if (group->container->bcontainer.space->as == as) {
return group;
} else {
error_setg(errp, "group %d used in multiple address spaces",
@@ -877,13 +872,13 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
* @name and @vbasedev->name are likely to be different depending
* on the type of the device, hence the need for passing @name
*/
-int vfio_attach_device(char *name, VFIODevice *vbasedev,
- AddressSpace *as, Error **errp)
+static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
{
int groupid = vfio_device_groupid(vbasedev, errp);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
- VFIOContainer *container;
+ VFIOContainerBase *bcontainer;
int ret;
if (groupid < 0) {
@@ -910,26 +905,200 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
return ret;
}
- container = group->container;
- vbasedev->container = container;
- QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next);
+ bcontainer = &group->container->bcontainer;
+ vbasedev->bcontainer = bcontainer;
+ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
return ret;
}
-void vfio_detach_device(VFIODevice *vbasedev)
+static void vfio_legacy_detach_device(VFIODevice *vbasedev)
{
VFIOGroup *group = vbasedev->group;
- if (!vbasedev->container) {
- return;
- }
-
QLIST_REMOVE(vbasedev, global_next);
QLIST_REMOVE(vbasedev, container_next);
- vbasedev->container = NULL;
+ vbasedev->bcontainer = NULL;
trace_vfio_detach_device(vbasedev->name, group->groupid);
vfio_put_base_device(vbasedev);
vfio_put_group(group);
}
+
+static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ VFIOGroup *group;
+ struct vfio_pci_hot_reset_info *info = NULL;
+ struct vfio_pci_dependent_device *devices;
+ struct vfio_pci_hot_reset *reset;
+ int32_t *fds;
+ int ret, i, count;
+ bool multi = false;
+
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
+
+ if (!single) {
+ vfio_pci_pre_reset(vdev);
+ }
+ vdev->vbasedev.needs_reset = false;
+
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+
+ if (ret) {
+ goto out_single;
+ }
+ devices = &info->devices[0];
+
+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
+
+ /* Verify that we have all the groups required */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ trace_vfio_pci_hot_reset_dep_devices(host.domain,
+ host.bus, host.slot, host.function, devices[i].group_id);
+
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+ continue;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ if (!vdev->has_pm_reset) {
+ error_report("vfio: Cannot reset device %s, "
+ "depends on group %d which is not owned.",
+ vdev->vbasedev.name, devices[i].group_id);
+ }
+ ret = -EPERM;
+ goto out;
+ }
+
+ /* Prep dependent devices for reset and clear our marker. */
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (!vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+ if (single) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+ vfio_pci_pre_reset(tmp);
+ tmp->vbasedev.needs_reset = false;
+ multi = true;
+ break;
+ }
+ }
+ }
+
+ if (!single && !multi) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+
+ /* Determine how many group fds need to be passed */
+ count = 0;
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ count++;
+ break;
+ }
+ }
+ }
+
+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+ fds = &reset->group_fds[0];
+
+ /* Fill in group fds */
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ fds[reset->count++] = group->fd;
+ break;
+ }
+ }
+ }
+
+ /* Bus reset! */
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+ g_free(reset);
+ if (ret) {
+ ret = -errno;
+ }
+
+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
+ ret ? strerror(errno) : "Success");
+
+out:
+ /* Re-enable INTx on affected devices */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+ continue;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ break;
+ }
+
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (!vbasedev_iter->dev->realized ||
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+ vfio_pci_post_reset(tmp);
+ break;
+ }
+ }
+ }
+out_single:
+ if (!single) {
+ vfio_pci_post_reset(vdev);
+ }
+ g_free(info);
+
+ return ret;
+}
+
+const VFIOIOMMUOps vfio_legacy_ops = {
+ .dma_map = vfio_legacy_dma_map,
+ .dma_unmap = vfio_legacy_dma_unmap,
+ .attach_device = vfio_legacy_attach_device,
+ .detach_device = vfio_legacy_detach_device,
+ .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking,
+ .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap,
+ .pci_hot_reset = vfio_legacy_pci_hot_reset,
+};