diff options
Diffstat (limited to 'hw/vfio/migration.c')
-rw-r--r-- | hw/vfio/migration.c | 305 |
1 files changed, 261 insertions, 44 deletions
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 6b58dddb88..1db7d52ab2 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -18,6 +18,8 @@ #include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" #include "migration/migration.h" +#include "migration/options.h" +#include "migration/savevm.h" #include "migration/vmstate.h" #include "migration/qemu-file.h" #include "migration/register.h" @@ -45,6 +47,7 @@ #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) +#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) /* * This is an arbitrary size based on migration of mlx5 devices, where typically @@ -68,6 +71,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) return "STOP_COPY"; case VFIO_DEVICE_STATE_RESUMING: return "RESUMING"; + case VFIO_DEVICE_STATE_PRE_COPY: + return "PRE_COPY"; default: return "UNKNOWN STATE"; } @@ -241,18 +246,45 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, return 0; } -/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ -static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) +static int vfio_query_precopy_size(VFIOMigration *migration) +{ + struct vfio_precopy_info precopy = { + .argsz = sizeof(precopy), + }; + + migration->precopy_init_size = 0; + migration->precopy_dirty_size = 0; + + if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { + return -errno; + } + + migration->precopy_init_size = precopy.initial_bytes; + migration->precopy_dirty_size = precopy.dirty_bytes; + + return 0; +} + +/* Returns the size of saved data on success and -errno on error */ +static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) { ssize_t data_size; data_size = read(migration->data_fd, migration->data_buffer, migration->data_buffer_size); if (data_size < 0) { + /* + * Pre-copy emptied all the device state for now. For more information, + * please refer to the Linux kernel VFIO uAPI. + */ + if (errno == ENOMSG) { + return 0; + } + return -errno; } if (data_size == 0) { - return 1; + return 0; } qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); @@ -262,7 +294,39 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) trace_vfio_save_block(migration->vbasedev->name, data_size); - return qemu_file_get_error(f); + return qemu_file_get_error(f) ?: data_size; +} + +static void vfio_update_estimated_pending_data(VFIOMigration *migration, + uint64_t data_size) +{ + if (!data_size) { + /* + * Pre-copy emptied all the device state for now, update estimated sizes + * accordingly. + */ + migration->precopy_init_size = 0; + migration->precopy_dirty_size = 0; + + return; + } + + if (migration->precopy_init_size) { + uint64_t init_size = MIN(migration->precopy_init_size, data_size); + + migration->precopy_init_size -= init_size; + data_size -= init_size; + } + + migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, + data_size); +} + +static bool vfio_precopy_supported(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + + return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; } /* ---------------------------------------------------------------------- */ @@ -285,6 +349,28 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) return -ENOMEM; } + if (vfio_precopy_supported(vbasedev)) { + int ret; + + switch (migration->device_state) { + case VFIO_DEVICE_STATE_RUNNING: + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, + VFIO_DEVICE_STATE_RUNNING); + if (ret) { + return ret; + } + + vfio_query_precopy_size(migration); + + break; + case VFIO_DEVICE_STATE_STOP: + /* vfio_save_complete_precopy() will go to STOP_COPY */ + break; + default: + return -EINVAL; + } + } + trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); @@ -299,26 +385,43 @@ static void vfio_save_cleanup(void *opaque) g_free(migration->data_buffer); migration->data_buffer = NULL; + migration->precopy_init_size = 0; + migration->precopy_dirty_size = 0; + migration->initial_data_sent = false; vfio_migration_cleanup(vbasedev); trace_vfio_save_cleanup(vbasedev->name); } +static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, + uint64_t *can_postcopy) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + + if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { + return; + } + + *must_precopy += + migration->precopy_init_size + migration->precopy_dirty_size; + + trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, + *can_postcopy, + migration->precopy_init_size, + migration->precopy_dirty_size); +} + /* * Migration size of VFIO devices can be as little as a few KBs or as big as * many GBs. This value should be big enough to cover the worst case. */ #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) -/* - * Only exact function is implemented and not estimate function. The reason is - * that during pre-copy phase of migration the estimate function is called - * repeatedly while pending RAM size is over the threshold, thus migration - * can't converge and querying the VFIO device pending data size is useless. - */ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, uint64_t *can_postcopy) { VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; /* @@ -328,16 +431,64 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, vfio_query_stop_copy_size(vbasedev, &stop_copy_size); *must_precopy += stop_copy_size; + if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { + vfio_query_precopy_size(migration); + + *must_precopy += + migration->precopy_init_size + migration->precopy_dirty_size; + } + trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, - stop_copy_size); + stop_copy_size, migration->precopy_init_size, + migration->precopy_dirty_size); +} + +static bool vfio_is_active_iterate(void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + + return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; +} + +static int vfio_save_iterate(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + ssize_t data_size; + + data_size = vfio_save_block(f, migration); + if (data_size < 0) { + return data_size; + } + + vfio_update_estimated_pending_data(migration, data_size); + + if (migrate_switchover_ack() && !migration->precopy_init_size && + !migration->initial_data_sent) { + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); + migration->initial_data_sent = true; + } else { + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + } + + trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, + migration->precopy_dirty_size); + + /* + * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. + * Return 1 so following handlers will not be potentially blocked. + */ + return 1; } static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; + ssize_t data_size; int ret; - /* We reach here with device state STOP only */ + /* We reach here with device state STOP or STOP_COPY only */ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, VFIO_DEVICE_STATE_STOP); if (ret) { @@ -345,11 +496,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) } do { - ret = vfio_save_block(f, vbasedev->migration); - if (ret < 0) { - return ret; + data_size = vfio_save_block(f, vbasedev->migration); + if (data_size < 0) { + return data_size; } - } while (!ret); + } while (data_size); qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ret = qemu_file_get_error(f); @@ -439,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) } break; } + case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: + { + if (!vfio_precopy_supported(vbasedev) || + !migrate_switchover_ack()) { + error_report("%s: Received INIT_DATA_SENT but switchover ack " + "is not used", vbasedev->name); + return -EINVAL; + } + + ret = qemu_loadvm_approve_switchover(); + if (ret) { + error_report( + "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", + vbasedev->name, ret, strerror(-ret)); + } + + return ret; + } default: error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); return -EINVAL; @@ -453,15 +622,26 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) return ret; } +static bool vfio_switchover_ack_needed(void *opaque) +{ + VFIODevice *vbasedev = opaque; + + return vfio_precopy_supported(vbasedev); +} + static const SaveVMHandlers savevm_vfio_handlers = { .save_setup = vfio_save_setup, .save_cleanup = vfio_save_cleanup, + .state_pending_estimate = vfio_state_pending_estimate, .state_pending_exact = vfio_state_pending_exact, + .is_active_iterate = vfio_is_active_iterate, + .save_live_iterate = vfio_save_iterate, .save_live_complete_precopy = vfio_save_complete_precopy, .save_state = vfio_save_state, .load_setup = vfio_load_setup, .load_cleanup = vfio_load_cleanup, .load_state = vfio_load_state, + .switchover_ack_needed = vfio_switchover_ack_needed, }; /* ---------------------------------------------------------------------- */ @@ -469,13 +649,18 @@ static const SaveVMHandlers savevm_vfio_handlers = { static void vfio_vmstate_change(void *opaque, bool running, RunState state) { VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; enum vfio_device_mig_state new_state; int ret; if (running) { new_state = VFIO_DEVICE_STATE_RUNNING; } else { - new_state = VFIO_DEVICE_STATE_STOP; + new_state = + (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && + (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? + VFIO_DEVICE_STATE_STOP_COPY : + VFIO_DEVICE_STATE_STOP; } /* @@ -512,7 +697,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) case MIGRATION_STATUS_CANCELLING: case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_FAILED: - bytes_transferred = 0; /* * If setting the device in RUNNING state fails, the device should * be reset. To do so, use ERROR state as a recover state. @@ -540,14 +724,6 @@ static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) feature->argsz = sizeof(buf); feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { - if (errno == ENOTTY) { - error_report("%s: VFIO migration is not supported in kernel", - vbasedev->name); - } else { - error_report("%s: Failed to query VFIO migration support, err: %s", - vbasedev->name, strerror(errno)); - } - return -errno; } @@ -602,6 +778,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) migration->vbasedev = vbasedev; migration->device_state = VFIO_DEVICE_STATE_RUNNING; migration->data_fd = -1; + migration->mig_flags = mig_flags; vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); @@ -625,6 +802,27 @@ static int vfio_migration_init(VFIODevice *vbasedev) return 0; } +static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) +{ + int ret; + + if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { + error_propagate(errp, err); + return -EINVAL; + } + + vbasedev->migration_blocker = error_copy(err); + error_free(err); + + ret = migrate_add_blocker(vbasedev->migration_blocker, errp); + if (ret < 0) { + error_free(vbasedev->migration_blocker); + vbasedev->migration_blocker = NULL; + } + + return ret; +} + /* ---------------------------------------------------------------------- */ int64_t vfio_mig_bytes_transferred(void) @@ -632,42 +830,61 @@ int64_t vfio_mig_bytes_transferred(void) return bytes_transferred; } +void vfio_reset_bytes_transferred(void) +{ + bytes_transferred = 0; +} + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) { - int ret = -ENOTSUP; + Error *err = NULL; + int ret; - if (!vbasedev->enable_migration) { - goto add_blocker; + if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { + error_setg(&err, "%s: Migration is disabled for VFIO device", + vbasedev->name); + return vfio_block_migration(vbasedev, err, errp); } ret = vfio_migration_init(vbasedev); if (ret) { - goto add_blocker; + if (ret == -ENOTTY) { + error_setg(&err, "%s: VFIO migration is not supported in kernel", + vbasedev->name); + } else { + error_setg(&err, + "%s: Migration couldn't be initialized for VFIO device, " + "err: %d (%s)", + vbasedev->name, ret, strerror(-ret)); + } + + return vfio_block_migration(vbasedev, err, errp); + } + + if (!vbasedev->dirty_pages_supported) { + if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { + error_setg(&err, + "%s: VFIO device doesn't support device dirty tracking", + vbasedev->name); + return vfio_block_migration(vbasedev, err, errp); + } + + warn_report("%s: VFIO device doesn't support device dirty tracking", + vbasedev->name); } - ret = vfio_block_multiple_devices_migration(errp); + ret = vfio_block_multiple_devices_migration(vbasedev, errp); if (ret) { return ret; } - ret = vfio_block_giommu_migration(errp); + ret = vfio_block_giommu_migration(vbasedev, errp); if (ret) { return ret; } - trace_vfio_migration_probe(vbasedev->name); + trace_vfio_migration_realize(vbasedev->name); return 0; - -add_blocker: - error_setg(&vbasedev->migration_blocker, - "VFIO device doesn't support migration"); - - ret = migrate_add_blocker(vbasedev->migration_blocker, errp); - if (ret < 0) { - error_free(vbasedev->migration_blocker); - vbasedev->migration_blocker = NULL; - } - return ret; } void vfio_migration_exit(VFIODevice *vbasedev) |