aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/common.c')
-rw-r--r--hw/vfio/common.c699
1 files changed, 569 insertions, 130 deletions
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index bab83c0e55..4d01ea3515 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -42,6 +42,7 @@
#include "migration/migration.h"
#include "migration/misc.h"
#include "migration/blocker.h"
+#include "migration/qemu-file.h"
#include "sysemu/tpm.h"
VFIOGroupList vfio_group_list =
@@ -319,6 +320,28 @@ const MemoryRegionOps vfio_region_ops = {
* Device state interfaces
*/
+typedef struct {
+ unsigned long *bitmap;
+ hwaddr size;
+ hwaddr pages;
+} VFIOBitmap;
+
+static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size)
+{
+ vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
+ vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) /
+ BITS_PER_BYTE;
+ vbmap->bitmap = g_try_malloc0(vbmap->size);
+ if (!vbmap->bitmap) {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+ uint64_t size, ram_addr_t ram_addr);
+
bool vfio_mig_active(void)
{
VFIOGroup *group;
@@ -339,6 +362,7 @@ bool vfio_mig_active(void)
}
static Error *multiple_devices_migration_blocker;
+static Error *giommu_migration_blocker;
static unsigned int vfio_migratable_device_num(void)
{
@@ -390,6 +414,64 @@ void vfio_unblock_multiple_devices_migration(void)
multiple_devices_migration_blocker = NULL;
}
+static bool vfio_viommu_preset(void)
+{
+ VFIOAddressSpace *space;
+
+ QLIST_FOREACH(space, &vfio_address_spaces, list) {
+ if (space->as != &address_space_memory) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int vfio_block_giommu_migration(Error **errp)
+{
+ int ret;
+
+ if (giommu_migration_blocker ||
+ !vfio_viommu_preset()) {
+ return 0;
+ }
+
+ error_setg(&giommu_migration_blocker,
+ "Migration is currently not supported with vIOMMU enabled");
+ ret = migrate_add_blocker(giommu_migration_blocker, errp);
+ if (ret < 0) {
+ error_free(giommu_migration_blocker);
+ giommu_migration_blocker = NULL;
+ }
+
+ return ret;
+}
+
+void vfio_migration_finalize(void)
+{
+ if (!giommu_migration_blocker ||
+ vfio_viommu_preset()) {
+ return;
+ }
+
+ migrate_del_blocker(giommu_migration_blocker);
+ error_free(giommu_migration_blocker);
+ giommu_migration_blocker = NULL;
+}
+
+static void vfio_set_migration_error(int err)
+{
+ MigrationState *ms = migrate_get_current();
+
+ if (migration_is_setup_or_active(ms->state)) {
+ WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
+ if (ms->to_dst_file) {
+ qemu_file_set_error(ms->to_dst_file, err);
+ }
+ }
+ }
+}
+
static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
{
VFIOGroup *group;
@@ -417,6 +499,22 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
return true;
}
+static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
+{
+ VFIOGroup *group;
+ VFIODevice *vbasedev;
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ if (!vbasedev->dirty_pages_supported) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
/*
* Check if all VFIO devices are running and migration is active, which is
* essentially equivalent to the migration being in pre-copy phase.
@@ -454,9 +552,14 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
{
struct vfio_iommu_type1_dma_unmap *unmap;
struct vfio_bitmap *bitmap;
- uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
+ VFIOBitmap vbmap;
int ret;
+ ret = vfio_bitmap_alloc(&vbmap, size);
+ if (ret) {
+ return ret;
+ }
+
unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
@@ -470,35 +573,28 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
* qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize
* to qemu_real_host_page_size.
*/
-
bitmap->pgsize = qemu_real_host_page_size();
- bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
- BITS_PER_BYTE;
+ bitmap->size = vbmap.size;
+ bitmap->data = (__u64 *)vbmap.bitmap;
- if (bitmap->size > container->max_dirty_bitmap_size) {
- error_report("UNMAP: Size of bitmap too big 0x%"PRIx64,
- (uint64_t)bitmap->size);
+ if (vbmap.size > container->max_dirty_bitmap_size) {
+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
ret = -E2BIG;
goto unmap_exit;
}
- bitmap->data = g_try_malloc0(bitmap->size);
- if (!bitmap->data) {
- ret = -ENOMEM;
- goto unmap_exit;
- }
-
ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
if (!ret) {
- cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
- iotlb->translated_addr, pages);
+ cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap,
+ iotlb->translated_addr, vbmap.pages);
} else {
error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
}
- g_free(bitmap->data);
unmap_exit:
g_free(unmap);
+ g_free(vbmap.bitmap);
+
return ret;
}
@@ -515,10 +611,16 @@ static int vfio_dma_unmap(VFIOContainer *container,
.iova = iova,
.size = size,
};
+ bool need_dirty_sync = false;
+ int ret;
- if (iotlb && container->dirty_pages_supported &&
- vfio_devices_all_running_and_mig_active(container)) {
- return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+ if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
+ if (!vfio_devices_all_device_dirty_tracking(container) &&
+ container->dirty_pages_supported) {
+ return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+ }
+
+ need_dirty_sync = true;
}
while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
@@ -544,10 +646,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
return -errno;
}
- if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
- cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
- tcg_enabled() ? DIRTY_CLIENTS_ALL :
- DIRTY_CLIENTS_NOCODE);
+ if (need_dirty_sync) {
+ ret = vfio_get_dirty_bitmap(container, iova, size,
+ iotlb->translated_addr);
+ if (ret) {
+ return ret;
+ }
}
return 0;
@@ -680,6 +784,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
iotlb->target_as->name ? iotlb->target_as->name : "none");
+ vfio_set_migration_error(-EINVAL);
return;
}
@@ -703,17 +808,18 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
read_only);
if (ret) {
error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
+ "0x%"HWADDR_PRIx", %p) = %d (%s)",
container, iova,
- iotlb->addr_mask + 1, vaddr, ret);
+ iotlb->addr_mask + 1, vaddr, ret, strerror(-ret));
}
} else {
ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
if (ret) {
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%m)",
+ "0x%"HWADDR_PRIx") = %d (%s)",
container, iova,
- iotlb->addr_mask + 1, ret);
+ iotlb->addr_mask + 1, ret, strerror(-ret));
+ vfio_set_migration_error(ret);
}
}
out:
@@ -868,6 +974,22 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
g_free(vrdl);
}
+static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
+ hwaddr iova, hwaddr end)
+{
+ VFIOHostDMAWindow *hostwin;
+ bool hostwin_found = false;
+
+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+ if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+ hostwin_found = true;
+ break;
+ }
+ }
+
+ return hostwin_found ? hostwin : NULL;
+}
+
static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
{
MemoryRegion *mr = section->mr;
@@ -884,24 +1006,15 @@ static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
return true;
}
-static void vfio_listener_region_add(MemoryListener *listener,
- MemoryRegionSection *section)
+static bool vfio_listener_valid_section(MemoryRegionSection *section,
+ const char *name)
{
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
- hwaddr iova, end;
- Int128 llend, llsize;
- void *vaddr;
- int ret;
- VFIOHostDMAWindow *hostwin;
- bool hostwin_found;
- Error *err = NULL;
-
if (vfio_listener_skipped_section(section)) {
- trace_vfio_listener_region_add_skip(
+ trace_vfio_listener_region_skip(name,
section->offset_within_address_space,
section->offset_within_address_space +
int128_get64(int128_sub(section->size, int128_one())));
- return;
+ return false;
}
if (unlikely((section->offset_within_address_space &
@@ -916,15 +1029,53 @@ static void vfio_listener_region_add(MemoryListener *listener,
section->offset_within_region,
qemu_real_host_page_size());
}
- return;
+ return false;
}
+ return true;
+}
+
+static bool vfio_get_section_iova_range(VFIOContainer *container,
+ MemoryRegionSection *section,
+ hwaddr *out_iova, hwaddr *out_end,
+ Int128 *out_llend)
+{
+ Int128 llend;
+ hwaddr iova;
+
iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
llend = int128_make64(section->offset_within_address_space);
llend = int128_add(llend, section->size);
llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask()));
if (int128_ge(int128_make64(iova), llend)) {
+ return false;
+ }
+
+ *out_iova = iova;
+ *out_end = int128_get64(int128_sub(llend, int128_one()));
+ if (out_llend) {
+ *out_llend = llend;
+ }
+ return true;
+}
+
+static void vfio_listener_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ hwaddr iova, end;
+ Int128 llend, llsize;
+ void *vaddr;
+ int ret;
+ VFIOHostDMAWindow *hostwin;
+ Error *err = NULL;
+
+ if (!vfio_listener_valid_section(section, "region_add")) {
+ return;
+ }
+
+ if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
if (memory_region_is_ram_device(section->mr)) {
trace_vfio_listener_region_add_no_dma_map(
memory_region_name(section->mr),
@@ -934,7 +1085,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
return;
}
- end = int128_get64(int128_sub(llend, int128_one()));
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
hwaddr pgsize = 0;
@@ -994,15 +1144,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
#endif
}
- hostwin_found = false;
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
- hostwin_found = true;
- break;
- }
- }
-
- if (!hostwin_found) {
+ hostwin = vfio_find_hostwin(container, iova, end);
+ if (!hostwin) {
error_setg(&err, "Container %p can't map guest IOVA region"
" 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
goto fail;
@@ -1095,8 +1238,9 @@ static void vfio_listener_region_add(MemoryListener *listener,
vaddr, section->readonly);
if (ret) {
error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
- container, iova, int128_get64(llsize), vaddr, ret);
+ "0x%"HWADDR_PRIx", %p) = %d (%s)",
+ container, iova, int128_get64(llsize), vaddr, ret,
+ strerror(-ret));
if (memory_region_is_ram_device(section->mr)) {
/* Allow unexpected mappings not to be fatal for RAM devices */
error_report_err(err);
@@ -1140,26 +1284,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
int ret;
bool try_unmap = true;
- if (vfio_listener_skipped_section(section)) {
- trace_vfio_listener_region_del_skip(
- section->offset_within_address_space,
- section->offset_within_address_space +
- int128_get64(int128_sub(section->size, int128_one())));
- return;
- }
-
- if (unlikely((section->offset_within_address_space &
- ~qemu_real_host_page_mask()) !=
- (section->offset_within_region & ~qemu_real_host_page_mask()))) {
- if (!vfio_known_safe_misalignment(section)) {
- error_report("%s received unaligned region %s iova=0x%"PRIx64
- " offset_within_region=0x%"PRIx64
- " qemu_real_host_page_size=0x%"PRIxPTR,
- __func__, memory_region_name(section->mr),
- section->offset_within_address_space,
- section->offset_within_region,
- qemu_real_host_page_size());
- }
+ if (!vfio_listener_valid_section(section, "region_del")) {
return;
}
@@ -1186,15 +1311,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
*/
}
- iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
- llend = int128_make64(section->offset_within_address_space);
- llend = int128_add(llend, section->size);
- llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask()));
-
- if (int128_ge(int128_make64(iova), llend)) {
+ if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
return;
}
- end = int128_get64(int128_sub(llend, int128_one()));
llsize = int128_sub(llend, int128_make64(iova));
@@ -1203,15 +1322,9 @@ static void vfio_listener_region_del(MemoryListener *listener,
if (memory_region_is_ram_device(section->mr)) {
hwaddr pgmask;
VFIOHostDMAWindow *hostwin;
- bool hostwin_found = false;
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
- hostwin_found = true;
- break;
- }
- }
- assert(hostwin_found); /* or region_add() would have failed */
+ hostwin = vfio_find_hostwin(container, iova, end);
+ assert(hostwin); /* or region_add() would have failed */
pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
@@ -1228,16 +1341,18 @@ static void vfio_listener_region_del(MemoryListener *listener,
ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
if (ret) {
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%m)",
- container, iova, int128_get64(llsize), ret);
+ "0x%"HWADDR_PRIx") = %d (%s)",
+ container, iova, int128_get64(llsize), ret,
+ strerror(-ret));
}
iova += int128_get64(llsize);
}
ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
if (ret) {
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%m)",
- container, iova, int128_get64(llsize), ret);
+ "0x%"HWADDR_PRIx") = %d (%s)",
+ container, iova, int128_get64(llsize), ret,
+ strerror(-ret));
}
}
@@ -1256,7 +1371,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
}
}
-static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
+static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
{
int ret;
struct vfio_iommu_type1_dirty_bitmap dirty = {
@@ -1264,7 +1379,7 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
};
if (!container->dirty_pages_supported) {
- return;
+ return 0;
}
if (start) {
@@ -1275,40 +1390,327 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
if (ret) {
+ ret = -errno;
error_report("Failed to set dirty tracking flag 0x%x errno: %d",
dirty.flags, errno);
}
+
+ return ret;
+}
+
+typedef struct VFIODirtyRanges {
+ hwaddr min32;
+ hwaddr max32;
+ hwaddr min64;
+ hwaddr max64;
+} VFIODirtyRanges;
+
+typedef struct VFIODirtyRangesListener {
+ VFIOContainer *container;
+ VFIODirtyRanges ranges;
+ MemoryListener listener;
+} VFIODirtyRangesListener;
+
+static void vfio_dirty_tracking_update(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIODirtyRangesListener *dirty = container_of(listener,
+ VFIODirtyRangesListener,
+ listener);
+ VFIODirtyRanges *range = &dirty->ranges;
+ hwaddr iova, end, *min, *max;
+
+ if (!vfio_listener_valid_section(section, "tracking_update") ||
+ !vfio_get_section_iova_range(dirty->container, section,
+ &iova, &end, NULL)) {
+ return;
+ }
+
+ /*
+ * The address space passed to the dirty tracker is reduced to two ranges:
+ * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges.
+ * The underlying reports of dirty will query a sub-interval of each of
+ * these ranges.
+ *
+ * The purpose of the dual range handling is to handle known cases of big
+ * holes in the address space, like the x86 AMD 1T hole. The alternative
+ * would be an IOVATree but that has a much bigger runtime overhead and
+ * unnecessary complexity.
+ */
+ min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
+ max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
+
+ if (*min > iova) {
+ *min = iova;
+ }
+ if (*max < end) {
+ *max = end;
+ }
+
+ trace_vfio_device_dirty_tracking_update(iova, end, *min, *max);
+ return;
+}
+
+static const MemoryListener vfio_dirty_tracking_listener = {
+ .name = "vfio-tracking",
+ .region_add = vfio_dirty_tracking_update,
+};
+
+static void vfio_dirty_tracking_init(VFIOContainer *container,
+ VFIODirtyRanges *ranges)
+{
+ VFIODirtyRangesListener dirty;
+
+ memset(&dirty, 0, sizeof(dirty));
+ dirty.ranges.min32 = UINT32_MAX;
+ dirty.ranges.min64 = UINT64_MAX;
+ dirty.listener = vfio_dirty_tracking_listener;
+ dirty.container = container;
+
+ memory_listener_register(&dirty.listener,
+ container->space->as);
+
+ *ranges = dirty.ranges;
+
+ /*
+ * The memory listener is synchronous, and used to calculate the range
+ * to dirty tracking. Unregister it after we are done as we are not
+ * interested in any follow-up updates.
+ */
+ memory_listener_unregister(&dirty.listener);
+}
+
+static void vfio_devices_dma_logging_stop(VFIOContainer *container)
+{
+ uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
+ sizeof(uint64_t))] = {};
+ struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+ VFIODevice *vbasedev;
+ VFIOGroup *group;
+
+ feature->argsz = sizeof(buf);
+ feature->flags = VFIO_DEVICE_FEATURE_SET |
+ VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ if (!vbasedev->dirty_tracking) {
+ continue;
+ }
+
+ if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+ warn_report("%s: Failed to stop DMA logging, err %d (%s)",
+ vbasedev->name, -errno, strerror(errno));
+ }
+ vbasedev->dirty_tracking = false;
+ }
+ }
+}
+
+static struct vfio_device_feature *
+vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
+ VFIODirtyRanges *tracking)
+{
+ struct vfio_device_feature *feature;
+ size_t feature_size;
+ struct vfio_device_feature_dma_logging_control *control;
+ struct vfio_device_feature_dma_logging_range *ranges;
+
+ feature_size = sizeof(struct vfio_device_feature) +
+ sizeof(struct vfio_device_feature_dma_logging_control);
+ feature = g_try_malloc0(feature_size);
+ if (!feature) {
+ errno = ENOMEM;
+ return NULL;
+ }
+ feature->argsz = feature_size;
+ feature->flags = VFIO_DEVICE_FEATURE_SET |
+ VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
+
+ control = (struct vfio_device_feature_dma_logging_control *)feature->data;
+ control->page_size = qemu_real_host_page_size();
+
+ /*
+ * DMA logging uAPI guarantees to support at least a number of ranges that
+ * fits into a single host kernel base page.
+ */
+ control->num_ranges = !!tracking->max32 + !!tracking->max64;
+ ranges = g_try_new0(struct vfio_device_feature_dma_logging_range,
+ control->num_ranges);
+ if (!ranges) {
+ g_free(feature);
+ errno = ENOMEM;
+
+ return NULL;
+ }
+
+ control->ranges = (__u64)(uintptr_t)ranges;
+ if (tracking->max32) {
+ ranges->iova = tracking->min32;
+ ranges->length = (tracking->max32 - tracking->min32) + 1;
+ ranges++;
+ }
+ if (tracking->max64) {
+ ranges->iova = tracking->min64;
+ ranges->length = (tracking->max64 - tracking->min64) + 1;
+ }
+
+ trace_vfio_device_dirty_tracking_start(control->num_ranges,
+ tracking->min32, tracking->max32,
+ tracking->min64, tracking->max64);
+
+ return feature;
+}
+
+static void vfio_device_feature_dma_logging_start_destroy(
+ struct vfio_device_feature *feature)
+{
+ struct vfio_device_feature_dma_logging_control *control =
+ (struct vfio_device_feature_dma_logging_control *)feature->data;
+ struct vfio_device_feature_dma_logging_range *ranges =
+ (struct vfio_device_feature_dma_logging_range *)(uintptr_t)control->ranges;
+
+ g_free(ranges);
+ g_free(feature);
+}
+
+static int vfio_devices_dma_logging_start(VFIOContainer *container)
+{
+ struct vfio_device_feature *feature;
+ VFIODirtyRanges ranges;
+ VFIODevice *vbasedev;
+ VFIOGroup *group;
+ int ret = 0;
+
+ vfio_dirty_tracking_init(container, &ranges);
+ feature = vfio_device_feature_dma_logging_start_create(container,
+ &ranges);
+ if (!feature) {
+ return -errno;
+ }
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ if (vbasedev->dirty_tracking) {
+ continue;
+ }
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
+ if (ret) {
+ ret = -errno;
+ error_report("%s: Failed to start DMA logging, err %d (%s)",
+ vbasedev->name, ret, strerror(errno));
+ goto out;
+ }
+ vbasedev->dirty_tracking = true;
+ }
+ }
+
+out:
+ if (ret) {
+ vfio_devices_dma_logging_stop(container);
+ }
+
+ vfio_device_feature_dma_logging_start_destroy(feature);
+
+ return ret;
}
static void vfio_listener_log_global_start(MemoryListener *listener)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ int ret;
+
+ if (vfio_devices_all_device_dirty_tracking(container)) {
+ ret = vfio_devices_dma_logging_start(container);
+ } else {
+ ret = vfio_set_dirty_page_tracking(container, true);
+ }
- vfio_set_dirty_page_tracking(container, true);
+ if (ret) {
+ error_report("vfio: Could not start dirty page tracking, err: %d (%s)",
+ ret, strerror(-ret));
+ vfio_set_migration_error(ret);
+ }
}
static void vfio_listener_log_global_stop(MemoryListener *listener)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ int ret = 0;
+
+ if (vfio_devices_all_device_dirty_tracking(container)) {
+ vfio_devices_dma_logging_stop(container);
+ } else {
+ ret = vfio_set_dirty_page_tracking(container, false);
+ }
- vfio_set_dirty_page_tracking(container, false);
+ if (ret) {
+ error_report("vfio: Could not stop dirty page tracking, err: %d (%s)",
+ ret, strerror(-ret));
+ vfio_set_migration_error(ret);
+ }
}
-static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
- uint64_t size, ram_addr_t ram_addr)
+static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
+ hwaddr size, void *bitmap)
{
- struct vfio_iommu_type1_dirty_bitmap *dbitmap;
- struct vfio_iommu_type1_dirty_bitmap_get *range;
- uint64_t pages;
+ uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+ sizeof(struct vfio_device_feature_dma_logging_report),
+ sizeof(__u64))] = {};
+ struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+ struct vfio_device_feature_dma_logging_report *report =
+ (struct vfio_device_feature_dma_logging_report *)feature->data;
+
+ report->iova = iova;
+ report->length = size;
+ report->page_size = qemu_real_host_page_size();
+ report->bitmap = (__u64)(uintptr_t)bitmap;
+
+ feature->argsz = sizeof(buf);
+ feature->flags = VFIO_DEVICE_FEATURE_GET |
+ VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
+
+ if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size)
+{
+ VFIODevice *vbasedev;
+ VFIOGroup *group;
int ret;
- if (!container->dirty_pages_supported) {
- cpu_physical_memory_set_dirty_range(ram_addr, size,
- tcg_enabled() ? DIRTY_CLIENTS_ALL :
- DIRTY_CLIENTS_NOCODE);
- return 0;
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ ret = vfio_device_dma_logging_report(vbasedev, iova, size,
+ vbmap->bitmap);
+ if (ret) {
+ error_report("%s: Failed to get DMA logging report, iova: "
+ "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
+ ", err: %d (%s)",
+ vbasedev->name, iova, size, ret, strerror(-ret));
+
+ return ret;
+ }
+ }
}
+ return 0;
+}
+
+static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
+ hwaddr iova, hwaddr size)
+{
+ struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+ struct vfio_iommu_type1_dirty_bitmap_get *range;
+ int ret;
+
dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1323,36 +1725,63 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
* to qemu_real_host_page_size.
*/
range->bitmap.pgsize = qemu_real_host_page_size();
-
- pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size();
- range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
- BITS_PER_BYTE;
- range->bitmap.data = g_try_malloc0(range->bitmap.size);
- if (!range->bitmap.data) {
- ret = -ENOMEM;
- goto err_out;
- }
+ range->bitmap.size = vbmap->size;
+ range->bitmap.data = (__u64 *)vbmap->bitmap;
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
if (ret) {
+ ret = -errno;
error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
" size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
(uint64_t)range->size, errno);
- goto err_out;
}
- cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data,
- ram_addr, pages);
-
- trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
- range->bitmap.size, ram_addr);
-err_out:
- g_free(range->bitmap.data);
g_free(dbitmap);
return ret;
}
+static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+ uint64_t size, ram_addr_t ram_addr)
+{
+ bool all_device_dirty_tracking =
+ vfio_devices_all_device_dirty_tracking(container);
+ VFIOBitmap vbmap;
+ int ret;
+
+ if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
+ cpu_physical_memory_set_dirty_range(ram_addr, size,
+ tcg_enabled() ? DIRTY_CLIENTS_ALL :
+ DIRTY_CLIENTS_NOCODE);
+ return 0;
+ }
+
+ ret = vfio_bitmap_alloc(&vbmap, size);
+ if (ret) {
+ return ret;
+ }
+
+ if (all_device_dirty_tracking) {
+ ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
+ } else {
+ ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
+ }
+
+ if (ret) {
+ goto out;
+ }
+
+ cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
+ vbmap.pages);
+
+ trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size,
+ ram_addr);
+out:
+ g_free(vbmap.bitmap);
+
+ return ret;
+}
+
typedef struct {
IOMMUNotifier n;
VFIOGuestIOMMU *giommu;
@@ -1366,29 +1795,33 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOContainer *container = giommu->container;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
ram_addr_t translated_addr;
+ int ret = -EINVAL;
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
iotlb->target_as->name ? iotlb->target_as->name : "none");
- return;
+ goto out;
}
rcu_read_lock();
if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
- int ret;
-
ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
translated_addr);
if (ret) {
error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%m)",
- container, iova,
- iotlb->addr_mask + 1, ret);
+ "0x%"HWADDR_PRIx") = %d (%s)",
+ container, iova, iotlb->addr_mask + 1, ret,
+ strerror(-ret));
}
}
rcu_read_unlock();
+
+out:
+ if (ret) {
+ vfio_set_migration_error(ret);
+ }
}
static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
@@ -1481,13 +1914,19 @@ static void vfio_listener_log_sync(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ int ret;
if (vfio_listener_skipped_section(section)) {
return;
}
if (vfio_devices_all_dirty_tracking(container)) {
- vfio_sync_dirty_bitmap(container, section);
+ ret = vfio_sync_dirty_bitmap(container, section);
+ if (ret) {
+ error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret,
+ strerror(-ret));
+ vfio_set_migration_error(ret);
+ }
}
}