diff options
44 files changed, 981 insertions, 297 deletions
diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst index 8024275d6d..54385a23e5 100644 --- a/docs/devel/migration/main.rst +++ b/docs/devel/migration/main.rst @@ -44,7 +44,8 @@ over any transport. - file migration: do the migration using a file that is passed to QEMU by path. A file offset option is supported to allow a management application to add its own metadata to the start of the file without - QEMU interference. + QEMU interference. Note that QEMU does not flush cached file + data/metadata at the end of migration. In addition, support is included for migration using RDMA, which transports the page data using ``RDMA``, where the hardware takes care of diff --git a/hw/core/machine.c b/hw/core/machine.c index 9ac5d5389a..0e9d646b61 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -32,7 +32,9 @@ #include "hw/virtio/virtio-net.h" #include "audio/audio.h" -GlobalProperty hw_compat_8_2[] = {}; +GlobalProperty hw_compat_8_2[] = { + { "migration", "zero-page-detection", "legacy"}, +}; const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); GlobalProperty hw_compat_8_1[] = { diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index b45e90edb2..7eca2f2377 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -693,6 +693,16 @@ const PropertyInfo qdev_prop_granule_mode = { .set_default_value = qdev_propinfo_set_default_value_enum, }; +const PropertyInfo qdev_prop_zero_page_detection = { + .name = "ZeroPageDetection", + .description = "zero_page_detection values, " + "none,legacy,multifd", + .enum_table = &ZeroPageDetection_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, + .set_default_value = qdev_propinfo_set_default_value_enum, +}; + /* --- Reserved Region --- */ /* diff --git a/hw/vfio/common.c b/hw/vfio/common.c index ff88c3f31c..011ceaab89 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -39,7 +39,6 @@ #include "sysemu/runstate.h" #include "trace.h" #include "qapi/error.h" -#include "migration/migration.h" #include "migration/misc.h" #include "migration/blocker.h" #include "migration/qemu-file.h" @@ -150,14 +149,8 @@ bool vfio_viommu_preset(VFIODevice *vbasedev) static void vfio_set_migration_error(int err) { - MigrationState *ms = migrate_get_current(); - - if (migration_is_setup_or_active(ms->state)) { - WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { - if (ms->to_dst_file) { - qemu_file_set_error(ms->to_dst_file, err); - } - } + if (migration_is_setup_or_active()) { + migration_file_set_error(err); } } @@ -180,10 +173,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) { VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - if (ms->state != MIGRATION_STATUS_ACTIVE && - ms->state != MIGRATION_STATUS_DEVICE) { + if (!migration_is_active() && !migration_is_device()) { return false; } @@ -225,7 +216,7 @@ vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer) { VFIODevice *vbasedev; - if (!migration_is_active(migrate_get_current())) { + if (!migration_is_active()) { return false; } diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 096d77eac3..9a775e4efc 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -32,7 +32,6 @@ #include "sysemu/reset.h" #include "trace.h" #include "qapi/error.h" -#include "migration/migration.h" #include "pci.h" VFIOGroupList vfio_group_list = diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 2050ac8897..1149c6b374 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -17,14 +17,12 @@ #include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" -#include "migration/migration.h" -#include "migration/options.h" +#include "migration/misc.h" #include "migration/savevm.h" #include "migration/vmstate.h" #include "migration/qemu-file.h" #include "migration/register.h" #include "migration/blocker.h" -#include "migration/misc.h" #include "qapi/error.h" #include "exec/ramlist.h" #include "exec/ram_addr.h" @@ -505,6 +503,12 @@ static bool vfio_is_active_iterate(void *opaque) return vfio_device_state_is_precopy(vbasedev); } +/* + * Note about migration rate limiting: VFIO migration buffer size is currently + * limited to 1MB, so there is no need to check if migration rate exceeded (as + * in the worst case it will exceed by 1MB). However, if the buffer size is + * later changed to a bigger value, migration rate should be enforced here. + */ static int vfio_save_iterate(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -529,11 +533,7 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, migration->precopy_dirty_size); - /* - * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. - * Return 1 so following handlers will not be potentially blocked. - */ - return 1; + return !migration->precopy_init_size && !migration->precopy_dirty_size; } static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) @@ -713,9 +713,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running, * Migration should be aborted in this case, but vm_state_notify() * currently does not support reporting failures. */ - if (migrate_get_current()->to_dst_file) { - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); - } + migration_file_set_error(ret); } trace_vfio_vmstate_change_prepare(vbasedev->name, running, @@ -745,9 +743,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) * Migration should be aborted in this case, but vm_state_notify() * currently does not support reporting failures. */ - if (migrate_get_current()->to_dst_file) { - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); - } + migration_file_set_error(ret); } trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index a1eea8547e..1af8621481 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -26,7 +26,6 @@ #include "qemu/sockets.h" #include "sysemu/runstate.h" #include "sysemu/cryptodev.h" -#include "migration/migration.h" #include "migration/postcopy-ram.h" #include "trace.h" #include "exec/ramblock.h" diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 89f853fa9e..609e39a821 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -31,8 +31,6 @@ #include "trace.h" #include "qemu/error-report.h" #include "migration/misc.h" -#include "migration/migration.h" -#include "migration/options.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h index 626be87dd3..438f65389f 100644 --- a/include/hw/qdev-properties-system.h +++ b/include/hw/qdev-properties-system.h @@ -9,6 +9,7 @@ extern const PropertyInfo qdev_prop_reserved_region; extern const PropertyInfo qdev_prop_multifd_compression; extern const PropertyInfo qdev_prop_mig_mode; extern const PropertyInfo qdev_prop_granule_mode; +extern const PropertyInfo qdev_prop_zero_page_detection; extern const PropertyInfo qdev_prop_losttickpolicy; extern const PropertyInfo qdev_prop_blockdev_on_error; extern const PropertyInfo qdev_prop_bios_chs_trans; @@ -50,6 +51,9 @@ extern const PropertyInfo qdev_prop_iothread_vq_mapping_list; MigMode) #define DEFINE_PROP_GRANULE_MODE(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_granule_mode, GranuleMode) +#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \ + ZeroPageDetection) #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ LostTickPolicy) diff --git a/include/migration/client-options.h b/include/migration/client-options.h new file mode 100644 index 0000000000..59f4b55cf4 --- /dev/null +++ b/include/migration/client-options.h @@ -0,0 +1,25 @@ +/* + * QEMU public migration capabilities + * + * Copyright (c) 2012-2023 Red Hat Inc + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_MIGRATION_CLIENT_OPTIONS_H +#define QEMU_MIGRATION_CLIENT_OPTIONS_H + +/* capabilities */ + +bool migrate_background_snapshot(void); +bool migrate_dirty_limit(void); +bool migrate_postcopy_ram(void); +bool migrate_switchover_ack(void); + +/* parameters */ + +MigMode migrate_mode(void); +uint64_t migrate_vcpu_dirty_limit_period(void); + +#endif diff --git a/include/migration/misc.h b/include/migration/misc.h index 5d1aa593ed..c9e200f4eb 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -17,6 +17,7 @@ #include "qemu/notify.h" #include "qapi/qapi-types-migration.h" #include "qapi/qapi-types-net.h" +#include "migration/client-options.h" /* migration/ram.c */ @@ -59,8 +60,10 @@ void dump_vmstate_json_to_file(FILE *out_fp); void migration_object_init(void); void migration_shutdown(void); bool migration_is_idle(void); -bool migration_is_active(MigrationState *); -bool migrate_mode_is_cpr(MigrationState *); +bool migration_is_active(void); +bool migration_is_device(void); +bool migration_thread_is_self(void); +bool migration_is_setup_or_active(void); typedef enum MigrationEventType { MIG_EVENT_PRECOPY_SETUP, @@ -99,16 +102,15 @@ void migration_add_notifier_mode(NotifierWithReturn *notify, MigrationNotifyFunc func, MigMode mode); void migration_remove_notifier(NotifierWithReturn *notify); -int migration_call_notifiers(MigrationState *s, MigrationEventType type, - Error **errp); -bool migration_in_setup(MigrationState *); -bool migration_has_finished(MigrationState *); -bool migration_has_failed(MigrationState *); -/* ...and after the device transmission */ +bool migration_is_running(void); +void migration_file_set_error(int err); + /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ bool migration_in_incoming_postcopy(void); + /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ bool migration_incoming_postcopy_advised(void); + /* True if background snapshot is active */ bool migration_in_bg_snapshot(void); diff --git a/include/migration/register.h b/include/migration/register.h index 9ab1f79512..d7b70a8be6 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -16,30 +16,130 @@ #include "hw/vmstate-if.h" +/** + * struct SaveVMHandlers: handler structure to finely control + * migration of complex subsystems and devices, such as RAM, block and + * VFIO. + */ typedef struct SaveVMHandlers { - /* This runs inside the BQL. */ - SaveStateHandler *save_state; - /* - * save_prepare is called early, even before migration starts, and can be - * used to perform early checks. + /* The following handlers run inside the BQL. */ + + /** + * @save_state + * + * Saves state section on the source using the latest state format + * version. + * + * Legacy method. Should be deprecated when all users are ported + * to VMStateDescription. + * + * @f: QEMUFile where to send the data + * @opaque: data pointer passed to register_savevm_live() + */ + void (*save_state)(QEMUFile *f, void *opaque); + + /** + * @save_prepare + * + * Called early, even before migration starts, and can be used to + * perform early checks. + * + * @opaque: data pointer passed to register_savevm_live() + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns zero to indicate success and negative for error */ int (*save_prepare)(void *opaque, Error **errp); + + /** + * @save_setup + * + * Initializes the data structures on the source and transmits + * first section containing information on the device + * + * @f: QEMUFile where to send the data + * @opaque: data pointer passed to register_savevm_live() + * + * Returns zero to indicate success and negative for error + */ int (*save_setup)(QEMUFile *f, void *opaque); + + /** + * @save_cleanup + * + * Uninitializes the data structures on the source + * + * @opaque: data pointer passed to register_savevm_live() + */ void (*save_cleanup)(void *opaque); + + /** + * @save_live_complete_postcopy + * + * Called at the end of postcopy for all postcopyable devices. + * + * @f: QEMUFile where to send the data + * @opaque: data pointer passed to register_savevm_live() + * + * Returns zero to indicate success and negative for error + */ int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); + + /** + * @save_live_complete_precopy + * + * Transmits the last section for the device containing any + * remaining data at the end of a precopy phase. When postcopy is + * enabled, devices that support postcopy will skip this step, + * where the final data will be flushed at the end of postcopy via + * @save_live_complete_postcopy instead. + * + * @f: QEMUFile where to send the data + * @opaque: data pointer passed to register_savevm_live() + * + * Returns zero to indicate success and negative for error + */ int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); /* This runs both outside and inside the BQL. */ + + /** + * @is_active + * + * Will skip a state section if not active + * + * @opaque: data pointer passed to register_savevm_live() + * + * Returns true if state section is active else false + */ bool (*is_active)(void *opaque); + + /** + * @has_postcopy + * + * Checks if a device supports postcopy + * + * @opaque: data pointer passed to register_savevm_live() + * + * Returns true for postcopy support else false + */ bool (*has_postcopy)(void *opaque); - /* is_active_iterate - * If it is not NULL then qemu_savevm_state_iterate will skip iteration if - * it returns false. For example, it is needed for only-postcopy-states, - * which needs to be handled by qemu_savevm_state_setup and - * qemu_savevm_state_pending, but do not need iterations until not in - * postcopy stage. + /** + * @is_active_iterate + * + * As #SaveVMHandlers.is_active(), will skip an inactive state + * section in qemu_savevm_state_iterate. + * + * For example, it is needed for only-postcopy-states, which needs + * to be handled by qemu_savevm_state_setup() and + * qemu_savevm_state_pending(), but do not need iterations until + * not in postcopy stage. + * + * @opaque: data pointer passed to register_savevm_live() + * + * Returns true if state section is active else false */ bool (*is_active_iterate)(void *opaque); @@ -48,44 +148,155 @@ typedef struct SaveVMHandlers { * use data that is local to the migration thread or protected * by other locks. */ + + /** + * @save_live_iterate + * + * Should send a chunk of data until the point that stream + * bandwidth limits tell it to stop. Each call generates one + * section. + * + * @f: QEMUFile where to send the data + * @opaque: data pointer passed to register_savevm_live() + * + * Returns 0 to indicate that there is still more data to send, + * 1 that there is no more data to send and + * negative to indicate an error. + */ int (*save_live_iterate)(QEMUFile *f, void *opaque); /* This runs outside the BQL! */ - /* Note for save_live_pending: - * must_precopy: - * - must be migrated in precopy or in stopped state - * - i.e. must be migrated before target start - * - * can_postcopy: - * - can migrate in postcopy or in stopped state - * - i.e. can migrate after target start - * - some can also be migrated during precopy (RAM) - * - some must be migrated after source stops (block-dirty-bitmap) - * - * Sum of can_postcopy and must_postcopy is the whole amount of + + /** + * @state_pending_estimate + * + * This estimates the remaining data to transfer + * + * Sum of @can_postcopy and @must_postcopy is the whole amount of * pending data. + * + * @opaque: data pointer passed to register_savevm_live() + * @must_precopy: amount of data that must be migrated in precopy + * or in stopped state, i.e. that must be migrated + * before target start. + * @can_postcopy: amount of data that can be migrated in postcopy + * or in stopped state, i.e. after target start. + * Some can also be migrated during precopy (RAM). + * Some must be migrated after source stops + * (block-dirty-bitmap) */ - /* This estimates the remaining data to transfer */ void (*state_pending_estimate)(void *opaque, uint64_t *must_precopy, uint64_t *can_postcopy); - /* This calculate the exact remaining data to transfer */ + + /** + * @state_pending_exact + * + * This calculates the exact remaining data to transfer + * + * Sum of @can_postcopy and @must_postcopy is the whole amount of + * pending data. + * + * @opaque: data pointer passed to register_savevm_live() + * @must_precopy: amount of data that must be migrated in precopy + * or in stopped state, i.e. that must be migrated + * before target start. + * @can_postcopy: amount of data that can be migrated in postcopy + * or in stopped state, i.e. after target start. + * Some can also be migrated during precopy (RAM). + * Some must be migrated after source stops + * (block-dirty-bitmap) + */ void (*state_pending_exact)(void *opaque, uint64_t *must_precopy, uint64_t *can_postcopy); - LoadStateHandler *load_state; + + /** + * @load_state + * + * Load sections generated by any of the save functions that + * generate sections. + * + * Legacy method. Should be deprecated when all users are ported + * to VMStateDescription. + * + * @f: QEMUFile where to receive the data + * @opaque: data pointer passed to register_savevm_live() + * @version_id: the maximum version_id supported + * + * Returns zero to indicate success and negative for error + */ + int (*load_state)(QEMUFile *f, void *opaque, int version_id); + + /** + * @load_setup + * + * Initializes the data structures on the destination. + * + * @f: QEMUFile where to receive the data + * @opaque: data pointer passed to register_savevm_live() + * + * Returns zero to indicate success and negative for error + */ int (*load_setup)(QEMUFile *f, void *opaque); + + /** + * @load_cleanup + * + * Uninitializes the data structures on the destination. + * + * @opaque: data pointer passed to register_savevm_live() + * + * Returns zero to indicate success and negative for error + */ int (*load_cleanup)(void *opaque); - /* Called when postcopy migration wants to resume from failure */ + + /** + * @resume_prepare + * + * Called when postcopy migration wants to resume from failure + * + * @s: Current migration state + * @opaque: data pointer passed to register_savevm_live() + * + * Returns zero to indicate success and negative for error + */ int (*resume_prepare)(MigrationState *s, void *opaque); - /* Checks if switchover ack should be used. Called only in dest */ + + /** + * @switchover_ack_needed + * + * Checks if switchover ack should be used. Called only on + * destination. + * + * @opaque: data pointer passed to register_savevm_live() + * + * Returns true if switchover ack should be used and false + * otherwise + */ bool (*switchover_ack_needed)(void *opaque); } SaveVMHandlers; +/** + * register_savevm_live: Register a set of custom migration handlers + * + * @idstr: state section identifier + * @instance_id: instance id + * @version_id: version id supported + * @ops: SaveVMHandlers structure + * @opaque: data pointer passed to SaveVMHandlers handlers + */ int register_savevm_live(const char *idstr, uint32_t instance_id, int version_id, const SaveVMHandlers *ops, void *opaque); +/** + * unregister_savevm: Unregister custom migration handlers + * + * @obj: object associated with state section + * @idstr: state section identifier + * @opaque: data pointer passed to register_savevm_live() + */ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque); #endif diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index a028dba4d0..50c277cf0b 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -151,8 +151,6 @@ typedef struct IRQState *qemu_irq; /* * Function types */ -typedef void SaveStateHandler(QEMUFile *f, void *opaque); -typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); typedef void (*qemu_irq_handler)(void *opaque, int n, int level); #endif /* QEMU_TYPEDEFS_H */ diff --git a/io/channel-file.c b/io/channel-file.c index d4706fa592..a6ad7770c6 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -242,11 +242,6 @@ static int qio_channel_file_close(QIOChannel *ioc, { QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); - if (qemu_fdatasync(fioc->fd) < 0) { - error_setg_errno(errp, errno, - "Unable to synchronize file data with storage device"); - return -1; - } if (qemu_close(fioc->fd) < 0) { error_setg_errno(errp, errno, "Unable to close file"); diff --git a/migration/colo.c b/migration/colo.c index 315e31fe32..84632a603e 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -63,9 +63,9 @@ static bool colo_runstate_is_stopped(void) return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); } -static void colo_checkpoint_notify(void *opaque) +static void colo_checkpoint_notify(void) { - MigrationState *s = opaque; + MigrationState *s = migrate_get_current(); int64_t next_notify_time; qemu_event_set(&s->colo_checkpoint_event); @@ -74,10 +74,15 @@ static void colo_checkpoint_notify(void *opaque) timer_mod(s->colo_delay_timer, next_notify_time); } +static void colo_checkpoint_notify_timer(void *opaque) +{ + colo_checkpoint_notify(); +} + void colo_checkpoint_delay_set(void) { if (migration_in_colo_state()) { - colo_checkpoint_notify(migrate_get_current()); + colo_checkpoint_notify(); } } @@ -162,7 +167,7 @@ static void primary_vm_do_failover(void) * kick COLO thread which might wait at * qemu_sem_wait(&s->colo_checkpoint_sem). */ - colo_checkpoint_notify(s); + colo_checkpoint_notify(); /* * Wake up COLO thread which may blocked in recv() or send(), @@ -518,7 +523,7 @@ out: static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) { - colo_checkpoint_notify(data); + colo_checkpoint_notify(); } static void colo_process_checkpoint(MigrationState *s) @@ -642,7 +647,7 @@ void migrate_start_colo_process(MigrationState *s) bql_unlock(); qemu_event_init(&s->colo_checkpoint_event, false); s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, - colo_checkpoint_notify, s); + colo_checkpoint_notify_timer, NULL); qemu_sem_init(&s->colo_exit_sem, 0); colo_process_checkpoint(s); diff --git a/migration/file.c b/migration/file.c index 164b079966..b0b963e0ce 100644 --- a/migration/file.c +++ b/migration/file.c @@ -159,7 +159,7 @@ void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp) int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov, int niov, RAMBlock *block, Error **errp) { - ssize_t ret = -1; + ssize_t ret = 0; int i, slice_idx, slice_num; uintptr_t base, next, offset; size_t len; @@ -191,7 +191,7 @@ int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov, */ offset = (uintptr_t) iov[slice_idx].iov_base - (uintptr_t) block->host; if (offset >= block->used_length) { - error_setg(errp, "offset " RAM_ADDR_FMT + error_setg(errp, "offset %" PRIxPTR "outside of ramblock %s range", offset, block->idstr); ret = -1; break; diff --git a/migration/meson.build b/migration/meson.build index 92b1cc4297..1eeb915ff6 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -22,6 +22,7 @@ system_ss.add(files( 'migration.c', 'multifd.c', 'multifd-zlib.c', + 'multifd-zero-page.c', 'ram-compress.c', 'options.c', 'postcopy-ram.c', diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 99b49df5dd..7e96ae6ffd 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -344,6 +344,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %s\n", MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION), MultiFDCompression_str(params->multifd_compression)); + assert(params->has_zero_page_detection); + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION), + qapi_enum_lookup(&ZeroPageDetection_lookup, + params->zero_page_detection)); monitor_printf(mon, "%s: %" PRIu64 " bytes\n", MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE), params->xbzrle_cache_size); @@ -634,6 +639,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_multifd_zstd_level = true; visit_type_uint8(v, param, &p->multifd_zstd_level, &err); break; + case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION: + p->has_zero_page_detection = true; + visit_type_ZeroPageDetection(v, param, &p->zero_page_detection, &err); + break; case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: p->has_xbzrle_cache_size = true; if (!visit_type_size(v, param, &cache_size, &err)) { diff --git a/migration/migration.c b/migration/migration.c index a49fcd53ee..644e073b7d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1081,9 +1081,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) * Return true if we're already in the middle of a migration * (i.e. any of the active or setup states) */ -bool migration_is_setup_or_active(int state) +bool migration_is_setup_or_active(void) { - switch (state) { + MigrationState *s = current_migration; + + switch (s->state) { case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: @@ -1101,9 +1103,11 @@ bool migration_is_setup_or_active(int state) } } -bool migration_is_running(int state) +bool migration_is_running(void) { - switch (state) { + MigrationState *s = current_migration; + + switch (s->state) { case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: @@ -1404,7 +1408,7 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_fclose(tmp); } - assert(!migration_is_active(s)); + assert(!migration_is_active()); if (s->state == MIGRATION_STATUS_CANCELLING) { migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, @@ -1475,7 +1479,7 @@ static void migrate_fd_cancel(MigrationState *s) do { old_state = s->state; - if (!migration_is_running(old_state)) { + if (!migration_is_running()) { break; } /* If the migration is paused, kick it out of the pause */ @@ -1544,16 +1548,6 @@ int migration_call_notifiers(MigrationState *s, MigrationEventType type, return ret; } -bool migration_in_setup(MigrationState *s) -{ - return s->state == MIGRATION_STATUS_SETUP; -} - -bool migration_has_finished(MigrationState *s) -{ - return s->state == MIGRATION_STATUS_COMPLETED; -} - bool migration_has_failed(MigrationState *s) { return (s->state == MIGRATION_STATUS_CANCELLED || @@ -1601,10 +1595,8 @@ bool migration_incoming_postcopy_advised(void) bool migration_in_bg_snapshot(void) { - MigrationState *s = migrate_get_current(); - return migrate_background_snapshot() && - migration_is_setup_or_active(s->state); + migration_is_setup_or_active(); } bool migration_is_idle(void) @@ -1637,12 +1629,28 @@ bool migration_is_idle(void) return false; } -bool migration_is_active(MigrationState *s) +bool migration_is_active(void) { + MigrationState *s = current_migration; + return (s->state == MIGRATION_STATUS_ACTIVE || s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); } +bool migration_is_device(void) +{ + MigrationState *s = current_migration; + + return s->state == MIGRATION_STATUS_DEVICE; +} + +bool migration_thread_is_self(void) +{ + MigrationState *s = current_migration; + + return qemu_thread_is_self(&s->thread); +} + bool migrate_mode_is_cpr(MigrationState *s) { return s->parameters.mode == MIG_MODE_CPR_REBOOT; @@ -1960,7 +1968,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return true; } - if (migration_is_running(s->state)) { + if (migration_is_running()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return false; } @@ -2297,7 +2305,7 @@ static void *source_return_path_thread(void *opaque) trace_source_return_path_thread_entry(); rcu_register_thread(); - while (migration_is_setup_or_active(ms->state)) { + while (migration_is_setup_or_active()) { trace_source_return_path_thread_loop_top(); header_type = qemu_get_be16(rp); @@ -3020,6 +3028,17 @@ static MigThrError postcopy_pause(MigrationState *s) } } +void migration_file_set_error(int err) +{ + MigrationState *s = current_migration; + + WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { + if (s->to_dst_file) { + qemu_file_set_error(s->to_dst_file, err); + } + } +} + static MigThrError migration_detect_error(MigrationState *s) { int ret; @@ -3461,7 +3480,7 @@ static void *migration_thread(void *opaque) trace_migration_thread_setup_complete(); - while (migration_is_active(s)) { + while (migration_is_active()) { if (urgent || !migration_rate_exceeded(s->to_dst_file)) { MigIterateState iter_state = migration_iteration_run(s); if (iter_state == MIG_ITERATE_SKIP) { @@ -3607,7 +3626,7 @@ static void *bg_migration_thread(void *opaque) migration_bh_schedule(bg_migration_vm_start_bh, s); bql_unlock(); - while (migration_is_active(s)) { + while (migration_is_active()) { MigIterateState iter_state = bg_migration_iteration_run(s); if (iter_state == MIG_ITERATE_SKIP) { continue; diff --git a/migration/migration.h b/migration/migration.h index 65c0b61cbd..8045e39c26 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -26,6 +26,7 @@ #include "qom/object.h" #include "postcopy-ram.h" #include "sysemu/runstate.h" +#include "migration/misc.h" struct PostcopyBlocktimeContext; @@ -479,8 +480,8 @@ bool migrate_has_error(MigrationState *s); void migrate_fd_connect(MigrationState *s, Error *error_in); -bool migration_is_setup_or_active(int state); -bool migration_is_running(int state); +int migration_call_notifiers(MigrationState *s, MigrationEventType type, + Error **errp); int migrate_init(MigrationState *s, Error **errp); bool migration_is_blocked(Error **errp); @@ -488,6 +489,8 @@ bool migration_is_blocked(Error **errp); bool migration_in_postcopy(void); bool migration_postcopy_is_alive(int state); MigrationState *migrate_get_current(void); +bool migration_has_failed(MigrationState *); +bool migrate_mode_is_cpr(MigrationState *); uint64_t ram_get_total_transferred_pages(void); diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c new file mode 100644 index 0000000000..1ba38be636 --- /dev/null +++ b/migration/multifd-zero-page.c @@ -0,0 +1,87 @@ +/* + * Multifd zero page detection implementation. + * + * Copyright (c) 2024 Bytedance Inc + * + * Authors: + * Hao Xiang <hao.xiang@bytedance.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "exec/ramblock.h" +#include "migration.h" +#include "multifd.h" +#include "options.h" +#include "ram.h" + +static bool multifd_zero_page_enabled(void) +{ + return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD; +} + +static void swap_page_offset(ram_addr_t *pages_offset, int a, int b) +{ + ram_addr_t temp; + + if (a == b) { + return; + } + + temp = pages_offset[a]; + pages_offset[a] = pages_offset[b]; + pages_offset[b] = temp; +} + +/** + * multifd_send_zero_page_detect: Perform zero page detection on all pages. + * + * Sorts normal pages before zero pages in p->pages->offset and updates + * p->pages->normal_num. + * + * @param p A pointer to the send params. + */ +void multifd_send_zero_page_detect(MultiFDSendParams *p) +{ + MultiFDPages_t *pages = p->pages; + RAMBlock *rb = pages->block; + int i = 0; + int j = pages->num - 1; + + if (!multifd_zero_page_enabled()) { + pages->normal_num = pages->num; + return; + } + + /* + * Sort the page offset array by moving all normal pages to + * the left and all zero pages to the right of the array. + */ + while (i <= j) { + uint64_t offset = pages->offset[i]; + + if (!buffer_is_zero(rb->host + offset, p->page_size)) { + i++; + continue; + } + + swap_page_offset(pages->offset, i, j); + ram_release_page(rb->idstr, offset); + j--; + } + + pages->normal_num = i; +} + +void multifd_recv_zero_page_process(MultiFDRecvParams *p) +{ + for (int i = 0; i < p->zero_num; i++) { + void *page = p->host + p->zero[i]; + if (!buffer_is_zero(page, p->page_size)) { + memset(page, 0, p->page_size); + } + } +} diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c index 6120faad65..83c0374380 100644 --- a/migration/multifd-zlib.c +++ b/migration/multifd-zlib.c @@ -123,13 +123,15 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) int ret; uint32_t i; - multifd_send_prepare_header(p); + if (!multifd_send_prepare_common(p)) { + goto out; + } - for (i = 0; i < pages->num; i++) { + for (i = 0; i < pages->normal_num; i++) { uint32_t available = z->zbuff_len - out_size; int flush = Z_NO_FLUSH; - if (i == pages->num - 1) { + if (i == pages->normal_num - 1) { flush = Z_SYNC_FLUSH; } @@ -172,10 +174,10 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) p->iov[p->iovs_num].iov_len = out_size; p->iovs_num++; p->next_packet_size = out_size; - p->flags |= MULTIFD_FLAG_ZLIB; +out: + p->flags |= MULTIFD_FLAG_ZLIB; multifd_send_fill_packet(p); - return 0; } @@ -261,6 +263,14 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) p->id, flags, MULTIFD_FLAG_ZLIB); return -1; } + + multifd_recv_zero_page_process(p); + + if (!p->normal_num) { + assert(in_size == 0); + return 0; + } + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); if (ret != 0) { @@ -310,6 +320,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) p->id, out_size, expected_size); return -1; } + return 0; } diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index cac236833d..02112255ad 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -118,16 +118,18 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) int ret; uint32_t i; - multifd_send_prepare_header(p); + if (!multifd_send_prepare_common(p)) { + goto out; + } z->out.dst = z->zbuff; z->out.size = z->zbuff_len; z->out.pos = 0; - for (i = 0; i < pages->num; i++) { + for (i = 0; i < pages->normal_num; i++) { ZSTD_EndDirective flush = ZSTD_e_continue; - if (i == pages->num - 1) { + if (i == pages->normal_num - 1) { flush = ZSTD_e_flush; } z->in.src = p->pages->block->host + pages->offset[i]; @@ -161,10 +163,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) p->iov[p->iovs_num].iov_len = z->out.pos; p->iovs_num++; p->next_packet_size = z->out.pos; - p->flags |= MULTIFD_FLAG_ZSTD; +out: + p->flags |= MULTIFD_FLAG_ZSTD; multifd_send_fill_packet(p); - return 0; } @@ -257,6 +259,14 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) p->id, flags, MULTIFD_FLAG_ZSTD); return -1; } + + multifd_recv_zero_page_process(p); + + if (!p->normal_num) { + assert(in_size == 0); + return 0; + } + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); if (ret != 0) { diff --git a/migration/multifd.c b/migration/multifd.c index d4a44da559..0179422f6d 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -11,6 +11,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qemu/rcu.h" #include "exec/target_page.h" #include "sysemu/sysemu.h" @@ -111,11 +112,16 @@ void multifd_send_channel_created(void) static void multifd_set_file_bitmap(MultiFDSendParams *p) { MultiFDPages_t *pages = p->pages; + uint32_t zero_num = p->pages->num - p->pages->normal_num; assert(pages->block); - for (int i = 0; i < p->pages->num; i++) { - ramblock_set_file_bmap_atomic(pages->block, pages->offset[i]); + for (int i = 0; i < p->pages->normal_num; i++) { + ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true); + } + + for (int i = p->pages->num; i < zero_num; i++) { + ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false); } } @@ -153,13 +159,13 @@ static void multifd_send_prepare_iovs(MultiFDSendParams *p) { MultiFDPages_t *pages = p->pages; - for (int i = 0; i < pages->num; i++) { + for (int i = 0; i < pages->normal_num; i++) { p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; p->iov[p->iovs_num].iov_len = p->page_size; p->iovs_num++; } - p->next_packet_size = pages->num * p->page_size; + p->next_packet_size = pages->normal_num * p->page_size; } /** @@ -178,6 +184,8 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) bool use_zero_copy_send = migrate_zero_copy_send(); int ret; + multifd_send_zero_page_detect(p); + if (!multifd_use_packets()) { multifd_send_prepare_iovs(p); multifd_set_file_bitmap(p); @@ -261,6 +269,13 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp) p->id, flags, MULTIFD_FLAG_NOCOMP); return -1; } + + multifd_recv_zero_page_process(p); + + if (!p->normal_num) { + return 0; + } + for (int i = 0; i < p->normal_num; i++) { p->iov[i].iov_base = p->host + p->normal[i]; p->iov[i].iov_len = p->page_size; @@ -295,6 +310,7 @@ static void multifd_pages_reset(MultiFDPages_t *pages) * overwritten later when reused. */ pages->num = 0; + pages->normal_num = 0; pages->block = NULL; } @@ -386,11 +402,13 @@ void multifd_send_fill_packet(MultiFDSendParams *p) MultiFDPacket_t *packet = p->packet; MultiFDPages_t *pages = p->pages; uint64_t packet_num; + uint32_t zero_num = pages->num - pages->normal_num; int i; packet->flags = cpu_to_be32(p->flags); packet->pages_alloc = cpu_to_be32(p->pages->allocated); - packet->normal_pages = cpu_to_be32(pages->num); + packet->normal_pages = cpu_to_be32(pages->normal_num); + packet->zero_pages = cpu_to_be32(zero_num); packet->next_packet_size = cpu_to_be32(p->next_packet_size); packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num); @@ -408,10 +426,11 @@ void multifd_send_fill_packet(MultiFDSendParams *p) } p->packets_sent++; - p->total_normal_pages += pages->num; + p->total_normal_pages += pages->normal_num; + p->total_zero_pages += zero_num; - trace_multifd_send(p->id, packet_num, pages->num, p->flags, - p->next_packet_size); + trace_multifd_send(p->id, packet_num, pages->normal_num, zero_num, + p->flags, p->next_packet_size); } static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) @@ -452,20 +471,29 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) p->normal_num = be32_to_cpu(packet->normal_pages); if (p->normal_num > packet->pages_alloc) { error_setg(errp, "multifd: received packet " - "with %u pages and expected maximum pages are %u", + "with %u normal pages and expected maximum pages are %u", p->normal_num, packet->pages_alloc) ; return -1; } + p->zero_num = be32_to_cpu(packet->zero_pages); + if (p->zero_num > packet->pages_alloc - p->normal_num) { + error_setg(errp, "multifd: received packet " + "with %u zero pages and expected maximum zero pages are %u", + p->zero_num, packet->pages_alloc - p->normal_num) ; + return -1; + } + p->next_packet_size = be32_to_cpu(packet->next_packet_size); p->packet_num = be64_to_cpu(packet->packet_num); p->packets_recved++; p->total_normal_pages += p->normal_num; + p->total_zero_pages += p->zero_num; - trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, - p->next_packet_size); + trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num, + p->flags, p->next_packet_size); - if (p->normal_num == 0) { + if (p->normal_num == 0 && p->zero_num == 0) { return 0; } @@ -491,6 +519,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) p->normal[i] = offset; } + for (i = 0; i < p->zero_num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); + + if (offset > (p->block->used_length - p->page_size)) { + error_setg(errp, "multifd: offset too long %" PRIu64 + " (max " RAM_ADDR_FMT ")", + offset, p->block->used_length); + return -1; + } + p->zero[i] = offset; + } + return 0; } @@ -710,16 +750,26 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) if (p->c) { migration_ioc_unregister_yank(p->c); /* - * An explicit close() on the channel here is normally not - * required, but can be helpful for "file:" iochannels, where it - * will include fdatasync() to make sure the data is flushed to the - * disk backend. + * The object_unref() cannot guarantee the fd will always be + * released because finalize() of the iochannel is only + * triggered on the last reference and it's not guaranteed + * that we always hold the last refcount when reaching here. + * + * Closing the fd explicitly has the benefit that if there is any + * registered I/O handler callbacks on such fd, that will get a + * POLLNVAL event and will further trigger the cleanup to finally + * release the IOC. * - * The object_unref() cannot guarantee that because: (1) finalize() - * of the iochannel is only triggered on the last reference, and - * it's not guaranteed that we always hold the last refcount when - * reaching here, and, (2) even if finalize() is invoked, it only - * does a close(fd) without data flush. + * FIXME: It should logically be guaranteed that all multifd + * channels have no I/O handler callback registered when reaching + * here, because migration thread will wait for all multifd channel + * establishments to complete during setup. Since + * migrate_fd_cleanup() will be scheduled in main thread too, all + * previous callbacks should guarantee to be completed when + * reaching here. See multifd_send_state.channels_created and its + * usage. In the future, we could replace this with an assert + * making sure we're the last reference, or simply drop it if above + * is more clear to be justified. */ qio_channel_close(p->c, &error_abort); object_unref(OBJECT(p->c)); @@ -908,6 +958,8 @@ static void *multifd_send_thread(void *opaque) stat64_add(&mig_stats.multifd_bytes, p->next_packet_size + p->packet_len); + stat64_add(&mig_stats.normal_pages, pages->normal_num); + stat64_add(&mig_stats.zero_pages, pages->num - pages->normal_num); multifd_pages_reset(p->pages); p->next_packet_size = 0; @@ -955,7 +1007,8 @@ out: rcu_unregister_thread(); migration_threads_remove(thread); - trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); + trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages, + p->total_zero_pages); return NULL; } @@ -1306,6 +1359,8 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) p->iov = NULL; g_free(p->normal); p->normal = NULL; + g_free(p->zero); + p->zero = NULL; multifd_recv_state->ops->recv_cleanup(p); } @@ -1439,7 +1494,7 @@ static void *multifd_recv_thread(void *opaque) flags = p->flags; /* recv methods don't know how to handle the SYNC flag */ p->flags &= ~MULTIFD_FLAG_SYNC; - has_data = !!p->normal_num; + has_data = p->normal_num || p->zero_num; qemu_mutex_unlock(&p->mutex); } else { /* @@ -1497,7 +1552,9 @@ static void *multifd_recv_thread(void *opaque) } rcu_unregister_thread(); - trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); + trace_multifd_recv_thread_end(p->id, p->packets_recved, + p->total_normal_pages, + p->total_zero_pages); return NULL; } @@ -1549,6 +1606,7 @@ int multifd_recv_setup(Error **errp) p->name = g_strdup_printf("multifdrecv_%d", i); p->iov = g_new0(struct iovec, page_count); p->normal = g_new0(ram_addr_t, page_count); + p->zero = g_new0(ram_addr_t, page_count); p->page_count = page_count; p->page_size = qemu_target_page_size(); } @@ -1623,3 +1681,17 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) QEMU_THREAD_JOINABLE); qatomic_inc(&multifd_recv_state->count); } + +bool multifd_send_prepare_common(MultiFDSendParams *p) +{ + multifd_send_zero_page_detect(p); + + if (!p->pages->normal_num) { + p->next_packet_size = 0; + return false; + } + + multifd_send_prepare_header(p); + + return true; +} diff --git a/migration/multifd.h b/migration/multifd.h index 7447c2bea3..c9d9b09239 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -55,14 +55,24 @@ typedef struct { /* size of the next packet that contains pages */ uint32_t next_packet_size; uint64_t packet_num; - uint64_t unused[4]; /* Reserved for future use */ + /* zero pages */ + uint32_t zero_pages; + uint32_t unused32[1]; /* Reserved for future use */ + uint64_t unused64[3]; /* Reserved for future use */ char ramblock[256]; + /* + * This array contains the pointers to: + * - normal pages (initial normal_pages entries) + * - zero pages (following zero_pages entries) + */ uint64_t offset[]; } __attribute__((packed)) MultiFDPacket_t; typedef struct { /* number of used pages */ uint32_t num; + /* number of normal pages */ + uint32_t normal_num; /* number of allocated pages */ uint32_t allocated; /* offset of each page */ @@ -136,6 +146,8 @@ typedef struct { uint64_t packets_sent; /* non zero pages sent through this channel */ uint64_t total_normal_pages; + /* zero pages sent through this channel */ + uint64_t total_zero_pages; /* buffers to send */ struct iovec *iov; /* number of iovs used */ @@ -194,12 +206,18 @@ typedef struct { uint8_t *host; /* non zero pages recv through this channel */ uint64_t total_normal_pages; + /* zero pages recv through this channel */ + uint64_t total_zero_pages; /* buffers to recv */ struct iovec *iov; /* Pages that are not zero */ ram_addr_t *normal; /* num of non zero pages */ uint32_t normal_num; + /* Pages that are zero */ + ram_addr_t *zero; + /* num of zero pages */ + uint32_t zero_num; /* used for de-compression methods */ void *compress_data; } MultiFDRecvParams; @@ -221,6 +239,9 @@ typedef struct { void multifd_register_ops(int method, MultiFDMethods *ops); void multifd_send_fill_packet(MultiFDSendParams *p); +bool multifd_send_prepare_common(MultiFDSendParams *p); +void multifd_send_zero_page_detect(MultiFDSendParams *p); +void multifd_recv_zero_page_process(MultiFDRecvParams *p); static inline void multifd_send_prepare_header(MultiFDSendParams *p) { diff --git a/migration/options.c b/migration/options.c index 40eb930940..9ed2fe4bee 100644 --- a/migration/options.c +++ b/migration/options.c @@ -179,6 +179,9 @@ Property migration_properties[] = { DEFINE_PROP_MIG_MODE("mode", MigrationState, parameters.mode, MIG_MODE_NORMAL), + DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState, + parameters.zero_page_detection, + ZERO_PAGE_DETECTION_MULTIFD), /* Migration capabilities */ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), @@ -681,7 +684,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp) MigrationState *s = migrate_get_current(); bool new_caps[MIGRATION_CAPABILITY__MAX]; - if (migration_is_running(s->state)) { + if (migration_is_running()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return false; } @@ -725,7 +728,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, MigrationCapabilityStatusList *cap; bool new_caps[MIGRATION_CAPABILITY__MAX]; - if (migration_is_running(s->state) || migration_in_colo_state()) { + if (migration_is_running() || migration_in_colo_state()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; } @@ -924,6 +927,13 @@ const char *migrate_tls_hostname(void) return s->parameters.tls_hostname; } +uint64_t migrate_vcpu_dirty_limit_period(void) +{ + MigrationState *s = migrate_get_current(); + + return s->parameters.x_vcpu_dirty_limit_period; +} + uint64_t migrate_xbzrle_cache_size(void) { MigrationState *s = migrate_get_current(); @@ -931,6 +941,13 @@ uint64_t migrate_xbzrle_cache_size(void) return s->parameters.xbzrle_cache_size; } +ZeroPageDetection migrate_zero_page_detection(void) +{ + MigrationState *s = migrate_get_current(); + + return s->parameters.zero_page_detection; +} + /* parameter setters */ void migrate_set_block_incremental(bool value) @@ -1041,6 +1058,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; params->has_mode = true; params->mode = s->parameters.mode; + params->has_zero_page_detection = true; + params->zero_page_detection = s->parameters.zero_page_detection; return params; } @@ -1077,6 +1096,7 @@ void migrate_params_init(MigrationParameters *params) params->has_x_vcpu_dirty_limit_period = true; params->has_vcpu_dirty_limit = true; params->has_mode = true; + params->has_zero_page_detection = true; } /* @@ -1391,6 +1411,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_mode) { dest->mode = params->mode; } + + if (params->has_zero_page_detection) { + dest->zero_page_detection = params->zero_page_detection; + } } static void migrate_params_apply(MigrateSetParameters *params, Error **errp) @@ -1541,6 +1565,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_mode) { s->parameters.mode = params->mode; } + + if (params->has_zero_page_detection) { + s->parameters.zero_page_detection = params->zero_page_detection; + } } void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) diff --git a/migration/options.h b/migration/options.h index 6ddd8dad9b..ab8199e207 100644 --- a/migration/options.h +++ b/migration/options.h @@ -16,6 +16,7 @@ #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "migration/client-options.h" /* migration properties */ @@ -24,12 +25,10 @@ extern Property migration_properties[]; /* capabilities */ bool migrate_auto_converge(void); -bool migrate_background_snapshot(void); bool migrate_block(void); bool migrate_colo(void); bool migrate_compress(void); bool migrate_dirty_bitmaps(void); -bool migrate_dirty_limit(void); bool migrate_events(void); bool migrate_mapped_ram(void); bool migrate_ignore_shared(void); @@ -38,11 +37,9 @@ bool migrate_multifd(void); bool migrate_pause_before_switchover(void); bool migrate_postcopy_blocktime(void); bool migrate_postcopy_preempt(void); -bool migrate_postcopy_ram(void); bool migrate_rdma_pin_all(void); bool migrate_release_ram(void); bool migrate_return_path(void); -bool migrate_switchover_ack(void); bool migrate_validate_uuid(void); bool migrate_xbzrle(void); bool migrate_zero_blocks(void); @@ -84,7 +81,6 @@ uint8_t migrate_max_cpu_throttle(void); uint64_t migrate_max_bandwidth(void); uint64_t migrate_avail_switchover_bandwidth(void); uint64_t migrate_max_postcopy_bandwidth(void); -MigMode migrate_mode(void); int migrate_multifd_channels(void); MultiFDCompression migrate_multifd_compression(void); int migrate_multifd_zlib_level(void); @@ -94,6 +90,7 @@ const char *migrate_tls_authz(void); const char *migrate_tls_creds(void); const char *migrate_tls_hostname(void); uint64_t migrate_xbzrle_cache_size(void); +ZeroPageDetection migrate_zero_page_detection(void); /* parameters setters */ diff --git a/migration/qemu-file.c b/migration/qemu-file.c index b10c882629..a10882d47f 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -63,6 +63,8 @@ struct QEMUFile { */ int qemu_file_shutdown(QEMUFile *f) { + Error *err = NULL; + /* * We must set qemufile error before the real shutdown(), otherwise * there can be a race window where we thought IO all went though @@ -91,7 +93,8 @@ int qemu_file_shutdown(QEMUFile *f) return -ENOSYS; } - if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 0) { + if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, &err) < 0) { + error_report_err(err); return -EIO; } diff --git a/migration/ram.c b/migration/ram.c index 003c28e133..8deb84984f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1140,6 +1140,10 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, QEMUFile *file = pss->pss_channel; int len = 0; + if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) { + return 0; + } + if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) { return 0; } @@ -1284,7 +1288,6 @@ static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) if (!multifd_queue_page(block, offset)) { return -1; } - stat64_add(&mig_stats.normal_pages, 1); return 1; } @@ -2076,7 +2079,6 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss, */ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) { - RAMBlock *block = pss->block; ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; @@ -2092,17 +2094,33 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) return 1; } + return ram_save_page(rs, pss); +} + +/** + * ram_save_target_page_multifd: send one target page to multifd workers + * + * Returns 1 if the page was queued, -1 otherwise. + * + * @rs: current RAM state + * @pss: data about the page we want to send + */ +static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss) +{ + RAMBlock *block = pss->block; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; + /* - * Do not use multifd in postcopy as one whole host page should be - * placed. Meanwhile postcopy requires atomic update of pages, so even - * if host page size == guest page size the dest guest during run may - * still see partially copied pages which is data corruption. + * While using multifd live migration, we still need to handle zero + * page checking on the migration main thread. */ - if (migrate_multifd() && !migration_in_postcopy()) { - return ram_save_multifd_page(block, offset); + if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) { + if (save_zero_page(rs, pss, offset)) { + return 1; + } } - return ram_save_page(rs, pss); + return ram_save_multifd_page(block, offset); } /* Should be called before sending a host page */ @@ -2909,10 +2927,9 @@ void qemu_guest_free_page_hint(void *addr, size_t len) RAMBlock *block; ram_addr_t offset; size_t used_len, start, npages; - MigrationState *s = migrate_get_current(); /* This function is currently expected to be used during live migration */ - if (!migration_is_setup_or_active(s->state)) { + if (!migration_is_setup_or_active()) { return; } @@ -3110,7 +3127,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } migration_ops = g_malloc0(sizeof(MigrationOps)); - migration_ops->ram_save_target_page = ram_save_target_page_legacy; + + if (migrate_multifd()) { + migration_ops->ram_save_target_page = ram_save_target_page_multifd; + } else { + migration_ops->ram_save_target_page = ram_save_target_page_legacy; + } bql_unlock(); ret = multifd_send_sync_main(); @@ -3150,9 +3172,13 @@ static void ram_save_file_bmap(QEMUFile *f) } } -void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset) +void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset, bool set) { - set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap); + if (set) { + set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap); + } else { + clear_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap); + } } /** @@ -3263,7 +3289,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) out: if (ret >= 0 - && migration_is_setup_or_active(migrate_get_current()->state)) { + && migration_is_setup_or_active()) { if (migrate_multifd() && migrate_multifd_flush_after_each_section() && !migrate_mapped_ram()) { ret = multifd_send_sync_main(); @@ -4214,6 +4240,12 @@ static int ram_load_precopy(QEMUFile *f) i++; addr = qemu_get_be64(f); + ret = qemu_file_get_error(f); + if (ret) { + error_report("Getting RAM address failed"); + break; + } + flags = addr & ~TARGET_PAGE_MASK; addr &= TARGET_PAGE_MASK; diff --git a/migration/ram.h b/migration/ram.h index b9ac0da587..08feecaf51 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -75,7 +75,8 @@ bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); void postcopy_preempt_shutdown_file(MigrationState *s); void *postcopy_preempt_thread(void *opaque); -void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset); +void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset, + bool set); /* ram cache */ int colo_init_ram_cache(void); diff --git a/migration/rdma.c b/migration/rdma.c index a355dcea89..855753c671 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3357,7 +3357,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) goto err_rdma_dest_wait; } - isock->host = rdma->host; + isock->host = g_strdup(rdma->host); isock->port = g_strdup_printf("%d", rdma->port); /* diff --git a/migration/savevm.c b/migration/savevm.c index dc1fb9c0d3..388d7af7cd 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1317,7 +1317,7 @@ void qemu_savevm_state_setup(QEMUFile *f) MigrationState *ms = migrate_get_current(); SaveStateEntry *se; Error *local_err = NULL; - int ret; + int ret = 0; json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size()); json_writer_start_array(ms->vmdesc, "devices"); @@ -1351,6 +1351,10 @@ void qemu_savevm_state_setup(QEMUFile *f) } } + if (ret) { + return; + } + if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) { error_report_err(local_err); } @@ -1390,7 +1394,8 @@ int qemu_savevm_state_resume_prepare(MigrationState *s) int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) { SaveStateEntry *se; - int ret = 1; + bool all_finished = true; + int ret; trace_savevm_state_iterate(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { @@ -1431,16 +1436,12 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) "%d(%s): %d", se->section_id, se->idstr, ret); qemu_file_set_error(f, ret); - } - if (ret <= 0) { - /* Do not proceed to the next vmstate before this one reported - completion of the current stage. This serializes the migration - and reduces the probability that a faster changing state is - synchronized over and over again. */ - break; + return ret; + } else if (!ret) { + all_finished = false; } } - return ret; + return all_finished; } static bool should_send_vmdesc(void) @@ -1705,7 +1706,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) MigrationState *ms = migrate_get_current(); MigrationStatus status; - if (migration_is_running(ms->state)) { + if (migration_is_running()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return -EINVAL; } diff --git a/migration/trace-events b/migration/trace-events index bf1a069632..f0e1cb80c7 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -128,21 +128,21 @@ postcopy_preempt_reset_channel(void) "" # multifd.c multifd_new_send_channel_async(uint8_t id) "channel %u" multifd_new_send_channel_async_error(uint8_t id, void *err) "channel=%u err=%p" -multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" +multifd_recv(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t zero, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" multifd_recv_new_channel(uint8_t id) "channel %u" multifd_recv_sync_main(long packet_num) "packet num %ld" multifd_recv_sync_main_signal(uint8_t id) "channel %u" multifd_recv_sync_main_wait(uint8_t id) "iter %u" multifd_recv_terminate_threads(bool error) "error %d" -multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 +multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 multifd_recv_thread_start(uint8_t id) "%u" -multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" +multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal_pages, uint32_t zero_pages, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" multifd_send_error(uint8_t id) "channel %u" multifd_send_sync_main(long packet_num) "packet num %ld" multifd_send_sync_main_signal(uint8_t id) "channel %u" multifd_send_sync_main_wait(uint8_t id) "channel %u" multifd_send_terminate_threads(void) "" -multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 +multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 multifd_send_thread_start(uint8_t id) "%u" multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" diff --git a/net/colo-compare.c b/net/colo-compare.c index f2dfc0ebdc..c4ad0ab71f 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -28,7 +28,6 @@ #include "sysemu/iothread.h" #include "net/colo-compare.h" #include "migration/colo.h" -#include "migration/migration.h" #include "util.h" #include "block/aio-wait.h" @@ -189,7 +188,7 @@ static void colo_compare_inconsistency_notify(CompareState *s) notify_remote_frame(s); } else { notifier_list_notify(&colo_compare_notifiers, - migrate_get_current()); + NULL); } } diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index e6bdb4562d..8564817073 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -26,7 +26,6 @@ #include <err.h> #include "standard-headers/linux/virtio_net.h" #include "monitor/monitor.h" -#include "migration/migration.h" #include "migration/misc.h" #include "hw/virtio/vhost.h" @@ -355,7 +354,7 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); if (s->always_svq || - migration_is_setup_or_active(migrate_get_current()->state)) { + migration_is_setup_or_active()) { v->shadow_vqs_enabled = true; } else { v->shadow_vqs_enabled = false; diff --git a/qapi/migration.json b/qapi/migration.json index 51d188b902..aa1b39bce1 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -671,6 +671,23 @@ 'data': [ 'normal', 'cpr-reboot' ] } ## +# @ZeroPageDetection: +# +# @none: Do not perform zero page checking. +# +# @legacy: Perform zero page checking in main migration thread. +# +# @multifd: Perform zero page checking in multifd sender thread if +# multifd migration is enabled, else in the main migration +# thread as for @legacy. +# +# Since: 9.0 +# +## +{ 'enum': 'ZeroPageDetection', + 'data': [ 'none', 'legacy', 'multifd' ] } + +## # @BitmapMigrationBitmapAliasTransform: # # @persistent: If present, the bitmap will be made persistent or @@ -891,6 +908,10 @@ # @mode: Migration mode. See description in @MigMode. Default is 'normal'. # (Since 8.2) # +# @zero-page-detection: Whether and how to detect zero pages. +# See description in @ZeroPageDetection. Default is 'multifd'. +# (since 9.0) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -924,7 +945,8 @@ 'block-bitmap-mapping', { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, 'vcpu-dirty-limit', - 'mode'] } + 'mode', + 'zero-page-detection'] } ## # @MigrateSetParameters: @@ -1083,6 +1105,10 @@ # @mode: Migration mode. See description in @MigMode. Default is 'normal'. # (Since 8.2) # +# @zero-page-detection: Whether and how to detect zero pages. +# See description in @ZeroPageDetection. Default is 'multifd'. +# (since 9.0) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -1136,7 +1162,8 @@ '*x-vcpu-dirty-limit-period': { 'type': 'uint64', 'features': [ 'unstable' ] }, '*vcpu-dirty-limit': 'uint64', - '*mode': 'MigMode'} } + '*mode': 'MigMode', + '*zero-page-detection': 'ZeroPageDetection'} } ## # @migrate-set-parameters: @@ -1311,6 +1338,10 @@ # @mode: Migration mode. See description in @MigMode. Default is 'normal'. # (Since 8.2) # +# @zero-page-detection: Whether and how to detect zero pages. +# See description in @ZeroPageDetection. Default is 'multifd'. +# (since 9.0) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -1361,7 +1392,8 @@ '*x-vcpu-dirty-limit-period': { 'type': 'uint64', 'features': [ 'unstable' ] }, '*vcpu-dirty-limit': 'uint64', - '*mode': 'MigMode'} } + '*mode': 'MigMode', + '*zero-page-detection': 'ZeroPageDetection'} } ## # @query-migrate-parameters: diff --git a/stubs/colo.c b/stubs/colo.c index 08c9f982d5..f8c069b739 100644 --- a/stubs/colo.c +++ b/stubs/colo.c @@ -2,7 +2,6 @@ #include "qemu/notify.h" #include "net/colo-compare.h" #include "migration/colo.h" -#include "migration/migration.h" #include "qemu/error-report.h" #include "qapi/qapi-commands-migration.h" diff --git a/system/dirtylimit.c b/system/dirtylimit.c index b5607eb8c2..ab20da34bb 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -25,8 +25,6 @@ #include "sysemu/kvm.h" #include "trace.h" #include "migration/misc.h" -#include "migration/migration.h" -#include "migration/options.h" /* * Dirtylimit stop working if dirty page rate error @@ -78,14 +76,13 @@ static bool dirtylimit_quit; static void vcpu_dirty_rate_stat_collect(void) { - MigrationState *s = migrate_get_current(); VcpuStat stat; int i = 0; int64_t period = DIRTYLIMIT_CALC_TIME_MS; if (migrate_dirty_limit() && - migration_is_active(s)) { - period = s->parameters.x_vcpu_dirty_limit_period; + migration_is_active()) { + period = migrate_vcpu_dirty_limit_period(); } /* calculate vcpu dirtyrate */ @@ -450,10 +447,8 @@ static void dirtylimit_cleanup(void) */ static bool dirtylimit_is_allowed(void) { - MigrationState *ms = migrate_get_current(); - - if (migration_is_running(ms->state) && - (!qemu_thread_is_self(&ms->thread)) && + if (migration_is_running() && + !migration_thread_is_self() && migrate_dirty_limit() && dirtylimit_in_service()) { return false; diff --git a/system/physmem.c b/system/physmem.c index 6e9ed97597..6cfb7a80ab 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -2681,53 +2681,69 @@ static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, return false; } +static MemTxResult flatview_write_continue_step(MemTxAttrs attrs, + const uint8_t *buf, + hwaddr len, hwaddr mr_addr, + hwaddr *l, MemoryRegion *mr) +{ + if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) { + return MEMTX_ACCESS_ERROR; + } + + if (!memory_access_is_direct(mr, true)) { + uint64_t val; + MemTxResult result; + bool release_lock = prepare_mmio_access(mr); + + *l = memory_access_size(mr, *l, mr_addr); + /* + * XXX: could force current_cpu to NULL to avoid + * potential bugs + */ + + /* + * Assure Coverity (and ourselves) that we are not going to OVERRUN + * the buffer by following ldn_he_p(). + */ +#ifdef QEMU_STATIC_ANALYSIS + assert((*l == 1 && len >= 1) || + (*l == 2 && len >= 2) || + (*l == 4 && len >= 4) || + (*l == 8 && len >= 8)); +#endif + val = ldn_he_p(buf, *l); + result = memory_region_dispatch_write(mr, mr_addr, val, + size_memop(*l), attrs); + if (release_lock) { + bql_unlock(); + } + + return result; + } else { + /* RAM case */ + uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l, + false); + + memmove(ram_ptr, buf, *l); + invalidate_and_set_dirty(mr, mr_addr, *l); + + return MEMTX_OK; + } +} + /* Called within RCU critical section. */ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, MemTxAttrs attrs, const void *ptr, - hwaddr len, hwaddr addr1, + hwaddr len, hwaddr mr_addr, hwaddr l, MemoryRegion *mr) { - uint8_t *ram_ptr; - uint64_t val; MemTxResult result = MEMTX_OK; - bool release_lock = false; const uint8_t *buf = ptr; for (;;) { - if (!flatview_access_allowed(mr, attrs, addr1, l)) { - result |= MEMTX_ACCESS_ERROR; - /* Keep going. */ - } else if (!memory_access_is_direct(mr, true)) { - release_lock |= prepare_mmio_access(mr); - l = memory_access_size(mr, l, addr1); - /* XXX: could force current_cpu to NULL to avoid - potential bugs */ - - /* - * Assure Coverity (and ourselves) that we are not going to OVERRUN - * the buffer by following ldn_he_p(). - */ -#ifdef QEMU_STATIC_ANALYSIS - assert((l == 1 && len >= 1) || - (l == 2 && len >= 2) || - (l == 4 && len >= 4) || - (l == 8 && len >= 8)); -#endif - val = ldn_he_p(buf, l); - result |= memory_region_dispatch_write(mr, addr1, val, - size_memop(l), attrs); - } else { - /* RAM case */ - ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); - memmove(ram_ptr, buf, l); - invalidate_and_set_dirty(mr, addr1, l); - } - - if (release_lock) { - bql_unlock(); - release_lock = false; - } + result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l, + mr); len -= l; buf += l; @@ -2738,7 +2754,7 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, } l = len; - mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); + mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs); } return result; @@ -2749,63 +2765,76 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, const void *buf, hwaddr len) { hwaddr l; - hwaddr addr1; + hwaddr mr_addr; MemoryRegion *mr; l = len; - mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); + mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs); if (!flatview_access_allowed(mr, attrs, addr, len)) { return MEMTX_ACCESS_ERROR; } return flatview_write_continue(fv, addr, attrs, buf, len, - addr1, l, mr); + mr_addr, l, mr); +} + +static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf, + hwaddr len, hwaddr mr_addr, + hwaddr *l, + MemoryRegion *mr) +{ + if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) { + return MEMTX_ACCESS_ERROR; + } + + if (!memory_access_is_direct(mr, false)) { + /* I/O case */ + uint64_t val; + MemTxResult result; + bool release_lock = prepare_mmio_access(mr); + + *l = memory_access_size(mr, *l, mr_addr); + result = memory_region_dispatch_read(mr, mr_addr, &val, size_memop(*l), + attrs); + + /* + * Assure Coverity (and ourselves) that we are not going to OVERRUN + * the buffer by following stn_he_p(). + */ +#ifdef QEMU_STATIC_ANALYSIS + assert((*l == 1 && len >= 1) || + (*l == 2 && len >= 2) || + (*l == 4 && len >= 4) || + (*l == 8 && len >= 8)); +#endif + stn_he_p(buf, *l, val); + + if (release_lock) { + bql_unlock(); + } + return result; + } else { + /* RAM case */ + uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l, + false); + + memcpy(buf, ram_ptr, *l); + + return MEMTX_OK; + } } /* Called within RCU critical section. */ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, MemTxAttrs attrs, void *ptr, - hwaddr len, hwaddr addr1, hwaddr l, + hwaddr len, hwaddr mr_addr, hwaddr l, MemoryRegion *mr) { - uint8_t *ram_ptr; - uint64_t val; MemTxResult result = MEMTX_OK; - bool release_lock = false; uint8_t *buf = ptr; fuzz_dma_read_cb(addr, len, mr); for (;;) { - if (!flatview_access_allowed(mr, attrs, addr1, l)) { - result |= MEMTX_ACCESS_ERROR; - /* Keep going. */ - } else if (!memory_access_is_direct(mr, false)) { - /* I/O case */ - release_lock |= prepare_mmio_access(mr); - l = memory_access_size(mr, l, addr1); - result |= memory_region_dispatch_read(mr, addr1, &val, - size_memop(l), attrs); - - /* - * Assure Coverity (and ourselves) that we are not going to OVERRUN - * the buffer by following stn_he_p(). - */ -#ifdef QEMU_STATIC_ANALYSIS - assert((l == 1 && len >= 1) || - (l == 2 && len >= 2) || - (l == 4 && len >= 4) || - (l == 8 && len >= 8)); -#endif - stn_he_p(buf, l, val); - } else { - /* RAM case */ - ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); - memcpy(buf, ram_ptr, l); - } - - if (release_lock) { - bql_unlock(); - release_lock = false; - } + result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr); len -= l; buf += l; @@ -2816,7 +2845,7 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, } l = len; - mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); + mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs); } return result; @@ -2827,16 +2856,16 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, MemTxAttrs attrs, void *buf, hwaddr len) { hwaddr l; - hwaddr addr1; + hwaddr mr_addr; MemoryRegion *mr; l = len; - mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); + mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs); if (!flatview_access_allowed(mr, attrs, addr, len)) { return MEMTX_ACCESS_ERROR; } return flatview_read_continue(fv, addr, attrs, buf, len, - addr1, l, mr); + mr_addr, l, mr); } MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, @@ -3341,6 +3370,59 @@ static inline MemoryRegion *address_space_translate_cached( return section.mr; } +/* Called within RCU critical section. */ +static MemTxResult address_space_write_continue_cached(MemTxAttrs attrs, + const void *ptr, + hwaddr len, + hwaddr mr_addr, + hwaddr l, + MemoryRegion *mr) +{ + MemTxResult result = MEMTX_OK; + const uint8_t *buf = ptr; + + for (;;) { + result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l, + mr); + + len -= l; + buf += l; + mr_addr += l; + + if (!len) { + break; + } + + l = len; + } + + return result; +} + +/* Called within RCU critical section. */ +static MemTxResult address_space_read_continue_cached(MemTxAttrs attrs, + void *ptr, hwaddr len, + hwaddr mr_addr, hwaddr l, + MemoryRegion *mr) +{ + MemTxResult result = MEMTX_OK; + uint8_t *buf = ptr; + + for (;;) { + result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr); + len -= l; + buf += l; + mr_addr += l; + + if (!len) { + break; + } + l = len; + } + + return result; +} + /* Called from RCU critical section. address_space_read_cached uses this * out of line function when the target is an MMIO or IOMMU region. */ @@ -3348,15 +3430,14 @@ MemTxResult address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, void *buf, hwaddr len) { - hwaddr addr1, l; + hwaddr mr_addr, l; MemoryRegion *mr; l = len; - mr = address_space_translate_cached(cache, addr, &addr1, &l, false, + mr = address_space_translate_cached(cache, addr, &mr_addr, &l, false, MEMTXATTRS_UNSPECIFIED); - return flatview_read_continue(cache->fv, - addr, MEMTXATTRS_UNSPECIFIED, buf, len, - addr1, l, mr); + return address_space_read_continue_cached(MEMTXATTRS_UNSPECIFIED, + buf, len, mr_addr, l, mr); } /* Called from RCU critical section. address_space_write_cached uses this @@ -3366,15 +3447,14 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, const void *buf, hwaddr len) { - hwaddr addr1, l; + hwaddr mr_addr, l; MemoryRegion *mr; l = len; - mr = address_space_translate_cached(cache, addr, &addr1, &l, true, + mr = address_space_translate_cached(cache, addr, &mr_addr, &l, true, MEMTXATTRS_UNSPECIFIED); - return flatview_write_continue(cache->fv, - addr, MEMTXATTRS_UNSPECIFIED, buf, len, - addr1, l, mr); + return address_space_write_continue_cached(MEMTXATTRS_UNSPECIFIED, + buf, len, mr_addr, l, mr); } #define ARG1_DECL MemoryRegionCache *cache diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c index 09e07cab9b..c1243891c3 100644 --- a/system/qdev-monitor.c +++ b/system/qdev-monitor.c @@ -38,7 +38,6 @@ #include "qemu/option_int.h" #include "sysemu/block-backend.h" #include "migration/misc.h" -#include "migration/migration.h" #include "qemu/cutils.h" #include "hw/qdev-properties.h" #include "hw/clock.h" diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index c19978a970..11a69a3b4e 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -22,7 +22,6 @@ #include "hw/irq.h" #include "qemu/log.h" #include "hw/loader.h" -#include "migration/migration.h" #include "sysemu/runstate.h" #include "cpu-csr.h" #include "kvm_loongarch.h" diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index c7afdb1e81..cda7d78a77 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -44,7 +44,7 @@ #include "kvm_riscv.h" #include "sbi_ecall_interface.h" #include "chardev/char-fe.h" -#include "migration/migration.h" +#include "migration/misc.h" #include "sysemu/runstate.h" #include "hw/riscv/numa.h" @@ -729,7 +729,7 @@ static void kvm_riscv_put_regs_timer(CPUState *cs) * frequency. Therefore, we should check whether they are the same here * during the migration. */ - if (migration_is_running(migrate_get_current()->state)) { + if (migration_is_running()) { KVM_RISCV_GET_TIMER(cs, frequency, reg); if (reg != env->kvm_timer_frequency) { error_report("Dst Hosts timer frequency != Src Hosts"); diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 4023d808f9..71895abb7f 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -2772,6 +2772,24 @@ test_migrate_precopy_tcp_multifd_start(QTestState *from, } static void * +test_migrate_precopy_tcp_multifd_start_zero_page_legacy(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + migrate_set_parameter_str(from, "zero-page-detection", "legacy"); + return NULL; +} + +static void * +test_migration_precopy_tcp_multifd_start_no_zero_page(QTestState *from, + QTestState *to) +{ + test_migrate_precopy_tcp_multifd_start_common(from, to, "none"); + migrate_set_parameter_str(from, "zero-page-detection", "none"); + return NULL; +} + +static void * test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from, QTestState *to) { @@ -2812,6 +2830,36 @@ static void test_multifd_tcp_none(void) test_precopy_common(&args); } +static void test_multifd_tcp_zero_page_legacy(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migrate_precopy_tcp_multifd_start_zero_page_legacy, + /* + * Multifd is more complicated than most of the features, it + * directly takes guest page buffers when sending, make sure + * everything will work alright even if guest page is changing. + */ + .live = true, + }; + test_precopy_common(&args); +} + +static void test_multifd_tcp_no_zero_page(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .start_hook = test_migration_precopy_tcp_multifd_start_no_zero_page, + /* + * Multifd is more complicated than most of the features, it + * directly takes guest page buffers when sending, make sure + * everything will work alright even if guest page is changing. + */ + .live = true, + }; + test_precopy_common(&args); +} + static void test_multifd_tcp_zlib(void) { MigrateCommon args = { @@ -3729,6 +3777,10 @@ int main(int argc, char **argv) } migration_test_add("/migration/multifd/tcp/plain/none", test_multifd_tcp_none); + migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy", + test_multifd_tcp_zero_page_legacy); + migration_test_add("/migration/multifd/tcp/plain/zero-page/none", + test_multifd_tcp_no_zero_page); migration_test_add("/migration/multifd/tcp/plain/cancel", test_multifd_tcp_cancel); migration_test_add("/migration/multifd/tcp/plain/zlib", diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c index c4f9faa273..63f28f26f4 100644 --- a/tests/unit/test-vmstate.c +++ b/tests/unit/test-vmstate.c @@ -24,7 +24,6 @@ #include "qemu/osdep.h" -#include "../migration/migration.h" #include "migration/vmstate.h" #include "migration/qemu-file-types.h" #include "../migration/qemu-file.h" |