diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-04-21 15:59:27 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-04-21 15:59:27 +0100 |
commit | 32c7e0ab755745e961f1772e95cac381cc68769d (patch) | |
tree | fa40ac2dd8de6e9d4dfa6607c41cc7ee03c3daf7 | |
parent | af7ec403b213544667cbc275926fa23b54e0cb02 (diff) | |
parent | 2c02468c9b1750aaa5fbeee49cdb55f101608c6a (diff) |
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170421' into staging
migration/next for 20170421
# gpg: Signature made Fri 21 Apr 2017 11:28:13 BST
# gpg: using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg: aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723
* remotes/juanquintela/tags/migration/20170421: (65 commits)
hmp: info migrate_parameters format tunes
hmp: info migrate_capability format tunes
migration: rename max_size to threshold_size
migration: set current_active_state once
virtio-rng: stop virtqueue while the CPU is stopped
migration: don't close a file descriptor while it can be in use
ram: Remove migration_bitmap_extend()
migration: Disable hotplug/unplug during migration
qdev: Move qdev_unplug() to qdev-monitor.c
qdev: Export qdev_hot_removed
qdev: qdev_hotplug is really a bool
migration: Remove MigrationState parameter from migration_is_idle()
ram: Use RAMBitmap type for coherence
ram: rename last_ram_offset() last_ram_pages()
ram: Use ramblock and page offset instead of absolute offset
ram: Change offset field in PageSearchStatus to page
ram: Remember last_page instead of last_offset
ram: Use page number instead of an address for the bitmap operations
ram: reorganize last_sent_block
ram: ram_discard_range() don't use the mis parameter
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | exec.c | 11 | ||||
-rw-r--r-- | hmp.c | 29 | ||||
-rw-r--r-- | hw/core/qdev.c | 40 | ||||
-rw-r--r-- | hw/virtio/trace-events | 3 | ||||
-rw-r--r-- | hw/virtio/virtio-rng.c | 29 | ||||
-rw-r--r-- | include/exec/ram_addr.h | 8 | ||||
-rw-r--r-- | include/hw/qdev-core.h | 3 | ||||
-rw-r--r-- | include/hw/virtio/virtio-rng.h | 2 | ||||
-rw-r--r-- | include/migration/migration.h | 41 | ||||
-rw-r--r-- | include/migration/vmstate.h | 3 | ||||
-rw-r--r-- | include/sysemu/sysemu.h | 2 | ||||
-rw-r--r-- | migration/migration.c | 72 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 14 | ||||
-rw-r--r-- | migration/ram.c | 1192 | ||||
-rw-r--r-- | migration/savevm.c | 19 | ||||
-rw-r--r-- | migration/trace-events | 4 | ||||
-rw-r--r-- | qapi-schema.json | 5 | ||||
-rw-r--r-- | qdev-monitor.c | 45 |
18 files changed, 791 insertions, 731 deletions
@@ -1528,7 +1528,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size) return offset; } -ram_addr_t last_ram_offset(void) +unsigned long last_ram_page(void) { RAMBlock *block; ram_addr_t last = 0; @@ -1538,7 +1538,7 @@ ram_addr_t last_ram_offset(void) last = MAX(last, block->offset + block->max_length); } rcu_read_unlock(); - return last; + return last >> TARGET_PAGE_BITS; } static void qemu_ram_setup_dump(void *addr, ram_addr_t size) @@ -1727,7 +1727,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) ram_addr_t old_ram_size, new_ram_size; Error *err = NULL; - old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS; + old_ram_size = last_ram_page(); qemu_mutex_lock_ramlist(); new_block->offset = find_ram_offset(new_block->max_length); @@ -1758,7 +1758,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) new_ram_size = MAX(old_ram_size, (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); if (new_ram_size > old_ram_size) { - migration_bitmap_extend(old_ram_size, new_ram_size); dirty_memory_extend(old_ram_size, new_ram_size); } /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, @@ -3307,9 +3306,9 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, * Allows code that needs to deal with migration bitmaps etc to still be built * target independent. */ -size_t qemu_target_page_bits(void) +size_t qemu_target_page_size(void) { - return TARGET_PAGE_BITS; + return TARGET_PAGE_SIZE; } #endif @@ -215,6 +215,9 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->ram->normal_bytes >> 10); monitor_printf(mon, "dirty sync count: %" PRIu64 "\n", info->ram->dirty_sync_count); + monitor_printf(mon, "page size: %" PRIu64 " kbytes\n", + info->ram->page_size >> 10); + if (info->ram->dirty_pages_rate) { monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n", info->ram->dirty_pages_rate); @@ -265,13 +268,11 @@ void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict) caps = qmp_query_migrate_capabilities(NULL); if (caps) { - monitor_printf(mon, "capabilities: "); for (cap = caps; cap; cap = cap->next) { - monitor_printf(mon, "%s: %s ", + monitor_printf(mon, "%s: %s\n", MigrationCapability_lookup[cap->value->capability], cap->value->state ? "on" : "off"); } - monitor_printf(mon, "\n"); } qapi_free_MigrationCapabilityStatusList(caps); @@ -284,46 +285,44 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) params = qmp_query_migrate_parameters(NULL); if (params) { - monitor_printf(mon, "parameters:"); assert(params->has_compress_level); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_LEVEL], params->compress_level); assert(params->has_compress_threads); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_THREADS], params->compress_threads); assert(params->has_decompress_threads); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS], params->decompress_threads); assert(params->has_cpu_throttle_initial); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL], params->cpu_throttle_initial); assert(params->has_cpu_throttle_increment); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT], params->cpu_throttle_increment); - monitor_printf(mon, " %s: '%s'", + monitor_printf(mon, "%s: '%s'\n", MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS], params->has_tls_creds ? params->tls_creds : ""); - monitor_printf(mon, " %s: '%s'", + monitor_printf(mon, "%s: '%s'\n", MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME], params->has_tls_hostname ? params->tls_hostname : ""); assert(params->has_max_bandwidth); - monitor_printf(mon, " %s: %" PRId64 " bytes/second", + monitor_printf(mon, "%s: %" PRId64 " bytes/second\n", MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH], params->max_bandwidth); assert(params->has_downtime_limit); - monitor_printf(mon, " %s: %" PRId64 " milliseconds", + monitor_printf(mon, "%s: %" PRId64 " milliseconds\n", MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT], params->downtime_limit); assert(params->has_x_checkpoint_delay); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY], params->x_checkpoint_delay); - monitor_printf(mon, "\n"); } qapi_free_MigrationParameters(params); diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 695d7c4216..02b632f6b3 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -39,9 +39,9 @@ #include "qapi-event.h" #include "migration/migration.h" -int qdev_hotplug = 0; +bool qdev_hotplug = false; static bool qdev_hot_added = false; -static bool qdev_hot_removed = false; +bool qdev_hot_removed = false; const VMStateDescription *qdev_get_vmsd(DeviceState *dev) { @@ -271,40 +271,6 @@ HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev) return hotplug_ctrl; } -void qdev_unplug(DeviceState *dev, Error **errp) -{ - DeviceClass *dc = DEVICE_GET_CLASS(dev); - HotplugHandler *hotplug_ctrl; - HotplugHandlerClass *hdc; - - if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { - error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name); - return; - } - - if (!dc->hotpluggable) { - error_setg(errp, QERR_DEVICE_NO_HOTPLUG, - object_get_typename(OBJECT(dev))); - return; - } - - qdev_hot_removed = true; - - hotplug_ctrl = qdev_get_hotplug_handler(dev); - /* hotpluggable device MUST have HotplugHandler, if it doesn't - * then something is very wrong with it */ - g_assert(hotplug_ctrl); - - /* If device supports async unplug just request it to be done, - * otherwise just remove it synchronously */ - hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl); - if (hdc->unplug_request) { - hotplug_handler_unplug_request(hotplug_ctrl, dev, errp); - } else { - hotplug_handler_unplug(hotplug_ctrl, dev, errp); - } -} - static int qdev_reset_one(DeviceState *dev, void *opaque) { device_reset(dev); @@ -385,7 +351,7 @@ void qdev_machine_creation_done(void) * ok, initial machine setup is done, starting from now we can * only create hotpluggable devices */ - qdev_hotplug = 1; + qdev_hotplug = true; } bool qdev_machine_modified(void) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 6926eedd3f..1f7a7c1ae1 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -11,8 +11,11 @@ virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u" # hw/virtio/virtio-rng.c virtio_rng_guest_not_ready(void *rng) "rng %p: guest not ready" +virtio_rng_cpu_is_stopped(void *rng, int size) "rng %p: cpu is stopped, dropping %d bytes" +virtio_rng_popped(void *rng) "rng %p: elem popped" virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed" virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left" +virtio_rng_vm_state_change(void *rng, int running, int state) "rng %p: state change to running %d state %d" # hw/virtio/virtio-balloon.c # diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index 9639f4e89b..a6ee501051 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -53,6 +53,15 @@ static void chr_read(void *opaque, const void *buf, size_t size) return; } + /* we can't modify the virtqueue until + * our state is fully synced + */ + + if (!runstate_check(RUN_STATE_RUNNING)) { + trace_virtio_rng_cpu_is_stopped(vrng, size); + return; + } + vrng->quota_remaining -= size; offset = 0; @@ -61,6 +70,7 @@ static void chr_read(void *opaque, const void *buf, size_t size) if (!elem) { break; } + trace_virtio_rng_popped(vrng); len = iov_from_buf(elem->in_sg, elem->in_num, 0, buf + offset, size - offset); offset += len; @@ -120,17 +130,21 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp) return f; } -static int virtio_rng_post_load(void *opaque, int version_id) +static void virtio_rng_vm_state_change(void *opaque, int running, + RunState state) { VirtIORNG *vrng = opaque; + trace_virtio_rng_vm_state_change(vrng, running, state); + /* We may have an element ready but couldn't process it due to a quota - * limit. Make sure to try again after live migration when the quota may - * have been reset. + * limit or because CPU was stopped. Make sure to try again when the + * CPU restart. */ - virtio_rng_process(vrng); - return 0; + if (running && is_guest_ready(vrng)) { + virtio_rng_process(vrng); + } } static void check_rate_limit(void *opaque) @@ -198,6 +212,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, check_rate_limit, vrng); vrng->activate_timer = true; + + vrng->vmstate = qemu_add_vm_change_state_handler(virtio_rng_vm_state_change, + vrng); } static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) @@ -205,6 +222,7 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIORNG *vrng = VIRTIO_RNG(dev); + qemu_del_vm_change_state_handler(vrng->vmstate); timer_del(vrng->rate_limit_timer); timer_free(vrng->rate_limit_timer); virtio_cleanup(vdev); @@ -218,7 +236,6 @@ static const VMStateDescription vmstate_virtio_rng = { VMSTATE_VIRTIO_DEVICE, VMSTATE_END_OF_LIST() }, - .post_load = virtio_rng_post_load, }; static Property virtio_rng_properties[] = { diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index b05dc84ab9..c9ddcd0880 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -53,7 +53,7 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) } long qemu_getrampagesize(void); -ram_addr_t last_ram_offset(void); +unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, bool share, const char *mem_path, Error **errp); @@ -354,11 +354,13 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, static inline uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, + RAMBlock *rb, ram_addr_t start, ram_addr_t length, - int64_t *real_dirty_pages) + uint64_t *real_dirty_pages) { ram_addr_t addr; + start = rb->offset + start; unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); uint64_t num_dirty = 0; @@ -411,7 +413,5 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, return num_dirty; } - -void migration_bitmap_extend(ram_addr_t old, ram_addr_t new); #endif #endif diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index ac682a6818..4bf86b0ad8 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -373,7 +373,8 @@ Object *qdev_get_machine(void); /* FIXME: make this a link<> */ void qdev_set_parent_bus(DeviceState *dev, BusState *bus); -extern int qdev_hotplug; +extern bool qdev_hotplug; +extern bool qdev_hot_removed; char *qdev_get_dev_path(DeviceState *dev); diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h index 2d40abdbdb..922dce7cac 100644 --- a/include/hw/virtio/virtio-rng.h +++ b/include/hw/virtio/virtio-rng.h @@ -45,6 +45,8 @@ typedef struct VirtIORNG { QEMUTimer *rate_limit_timer; int64_t quota_remaining; bool activate_timer; + + VMChangeStateEntry *vmstate; } VirtIORNG; #endif diff --git a/include/migration/migration.h b/include/migration/migration.h index 5720c884f4..ba1a16cbc1 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -128,18 +128,6 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); -/* - * An outstanding page request, on the source, having been received - * and queued - */ -struct MigrationSrcPageRequest { - RAMBlock *rb; - hwaddr offset; - hwaddr len; - - QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req; -}; - struct MigrationState { size_t bytes_xfer; @@ -166,14 +154,9 @@ struct MigrationState int64_t total_time; int64_t downtime; int64_t expected_downtime; - int64_t dirty_pages_rate; - int64_t dirty_bytes_rate; bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; int64_t xbzrle_cache_size; int64_t setup_time; - int64_t dirty_sync_count; - /* Count of requests incoming from destination */ - int64_t postcopy_requests; /* Flag set once the migration has been asked to enter postcopy */ bool start_postcopy; @@ -186,11 +169,6 @@ struct MigrationState /* Flag set once the migration thread called bdrv_inactivate_all */ bool block_inactive; - /* Queue of outstanding page requests from the destination */ - QemuMutex src_page_req_mutex; - QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests; - /* The RAMBlock used in the last src_page_request */ - RAMBlock *last_req_rb; /* The semaphore is used to notify COLO thread that failover is finished */ QemuSemaphore colo_exit_sem; @@ -256,11 +234,11 @@ void remove_migration_state_change_notifier(Notifier *notify); MigrationState *migrate_init(const MigrationParams *params); bool migration_is_blocked(Error **errp); bool migration_in_setup(MigrationState *); -bool migration_is_idle(MigrationState *s); +bool migration_is_idle(void); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); /* True if outgoing migration has entered postcopy phase */ -bool migration_in_postcopy(MigrationState *); +bool migration_in_postcopy(void); /* ...and after the device transmission */ bool migration_in_postcopy_after_devices(MigrationState *); MigrationState *migrate_get_current(void); @@ -272,15 +250,14 @@ void migrate_decompress_threads_join(void); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_transferred(void); uint64_t ram_bytes_total(void); +uint64_t ram_dirty_sync_count(void); +uint64_t ram_dirty_pages_rate(void); +uint64_t ram_postcopy_requests(void); void free_xbzrle_decoded_buf(void); void acct_update_position(QEMUFile *f, size_t size, bool zero); -uint64_t dup_mig_bytes_transferred(void); uint64_t dup_mig_pages_transferred(void); -uint64_t skipped_mig_bytes_transferred(void); -uint64_t skipped_mig_pages_transferred(void); -uint64_t norm_mig_bytes_transferred(void); uint64_t norm_mig_pages_transferred(void); uint64_t xbzrle_mig_bytes_transferred(void); uint64_t xbzrle_mig_pages_transferred(void); @@ -293,8 +270,7 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected); /* For outgoing discard bitmap */ int ram_postcopy_send_discard_bitmap(MigrationState *ms); /* For incoming postcopy discard */ -int ram_discard_range(MigrationIncomingState *mis, const char *block_name, - uint64_t start, size_t length); +int ram_discard_range(const char *block_name, uint64_t start, size_t length); int ram_postcopy_incoming_init(MigrationIncomingState *mis); void ram_postcopy_migrated_memory_release(MigrationState *ms); @@ -377,9 +353,8 @@ void savevm_skip_configuration(void); int global_state_store(void); void global_state_store_running(void); -void flush_page_queue(MigrationState *ms); -int ram_save_queue_pages(MigrationState *ms, const char *rbname, - ram_addr_t start, ram_addr_t len); +void migration_page_queue_free(void); +int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); uint64_t ram_pagesize_summary(void); PostcopyState postcopy_state_get(void); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index f2dbf8410a..dad3984c07 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -56,7 +56,8 @@ typedef struct SaveVMHandlers { /* This runs outside the iothread lock! */ int (*save_live_setup)(QEMUFile *f, void *opaque); - void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size, + void (*save_live_pending)(QEMUFile *f, void *opaque, + uint64_t threshold_size, uint64_t *non_postcopiable_pending, uint64_t *postcopiable_pending); LoadStateHandler *load_state; diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 576c7ce640..16175f7295 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -67,7 +67,7 @@ int qemu_reset_requested_get(void); void qemu_system_killed(int signal, pid_t pid); void qemu_system_reset(bool report); void qemu_system_guest_panicked(GuestPanicInformation *info); -size_t qemu_target_page_bits(void); +size_t qemu_target_page_size(void); void qemu_add_exit_notifier(Notifier *notify); void qemu_remove_exit_notifier(Notifier *notify); diff --git a/migration/migration.c b/migration/migration.c index ad4036fdb1..353f2728cf 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -109,7 +109,6 @@ MigrationState *migrate_get_current(void) }; if (!once) { - qemu_mutex_init(¤t_migration.src_page_req_mutex); current_migration.parameters.tls_creds = g_strdup(""); current_migration.parameters.tls_hostname = g_strdup(""); once = true; @@ -436,9 +435,6 @@ static void process_incoming_migration_co(void *opaque) qemu_thread_join(&mis->colo_incoming_thread); } - qemu_fclose(f); - free_xbzrle_decoded_buf(); - if (ret < 0) { migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); @@ -447,6 +443,9 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } + qemu_fclose(f); + free_xbzrle_decoded_buf(); + mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); qemu_bh_schedule(mis->bh); } @@ -651,16 +650,19 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->ram->transferred = ram_bytes_transferred(); info->ram->total = ram_bytes_total(); info->ram->duplicate = dup_mig_pages_transferred(); - info->ram->skipped = skipped_mig_pages_transferred(); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; info->ram->normal = norm_mig_pages_transferred(); - info->ram->normal_bytes = norm_mig_bytes_transferred(); + info->ram->normal_bytes = norm_mig_pages_transferred() * + qemu_target_page_size(); info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = s->dirty_sync_count; - info->ram->postcopy_requests = s->postcopy_requests; + info->ram->dirty_sync_count = ram_dirty_sync_count(); + info->ram->postcopy_requests = ram_postcopy_requests(); + info->ram->page_size = qemu_target_page_size(); if (s->state != MIGRATION_STATUS_COMPLETED) { info->ram->remaining = ram_bytes_remaining(); - info->ram->dirty_pages_rate = s->dirty_pages_rate; + info->ram->dirty_pages_rate = ram_dirty_pages_rate(); } } @@ -955,7 +957,7 @@ static void migrate_fd_cleanup(void *opaque) qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; - flush_page_queue(s); + migration_page_queue_free(); if (s->to_dst_file) { trace_migrate_fd_cleanup(); @@ -1061,21 +1063,21 @@ bool migration_has_failed(MigrationState *s) s->state == MIGRATION_STATUS_FAILED); } -bool migration_in_postcopy(MigrationState *s) +bool migration_in_postcopy(void) { + MigrationState *s = migrate_get_current(); + return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); } bool migration_in_postcopy_after_devices(MigrationState *s) { - return migration_in_postcopy(s) && s->postcopy_after_devices; + return migration_in_postcopy() && s->postcopy_after_devices; } -bool migration_is_idle(MigrationState *s) +bool migration_is_idle(void) { - if (!s) { - s = migrate_get_current(); - } + MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: @@ -1116,22 +1118,15 @@ MigrationState *migrate_init(const MigrationParams *params) s->mbps = 0.0; s->downtime = 0; s->expected_downtime = 0; - s->dirty_pages_rate = 0; - s->dirty_bytes_rate = 0; s->setup_time = 0; - s->dirty_sync_count = 0; s->start_postcopy = false; s->postcopy_after_devices = false; - s->postcopy_requests = 0; s->migration_thread_running = false; - s->last_req_rb = NULL; error_free(s->error); s->error = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); - QSIMPLEQ_INIT(&s->src_page_requests); - s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); return s; } @@ -1147,7 +1142,7 @@ int migrate_add_blocker(Error *reason, Error **errp) return -EACCES; } - if (migration_is_idle(NULL)) { + if (migration_is_idle()) { migration_blockers = g_slist_prepend(migration_blockers, reason); return 0; } @@ -1485,7 +1480,7 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, return; } - if (ram_save_queue_pages(ms, rbname, start, len)) { + if (ram_save_queue_pages(rbname, start, len)) { mark_source_rp_bad(ms); } } @@ -1915,7 +1910,12 @@ static void *migration_thread(void *opaque) int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); int64_t initial_bytes = 0; - int64_t max_size = 0; + /* + * The final stage happens when the remaining data is smaller than + * this threshold; it's calculated from the requested downtime and + * measured bandwidth + */ + int64_t threshold_size = 0; int64_t start_time = initial_time; int64_t end_time; bool old_vm_running = false; @@ -1946,7 +1946,6 @@ static void *migration_thread(void *opaque) qemu_savevm_state_begin(s->to_dst_file, &s->params); s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; - current_active_state = MIGRATION_STATUS_ACTIVE; migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -1960,17 +1959,17 @@ static void *migration_thread(void *opaque) if (!qemu_file_rate_limit(s->to_dst_file)) { uint64_t pend_post, pend_nonpost; - qemu_savevm_state_pending(s->to_dst_file, max_size, &pend_nonpost, - &pend_post); + qemu_savevm_state_pending(s->to_dst_file, threshold_size, + &pend_nonpost, &pend_post); pending_size = pend_nonpost + pend_post; - trace_migrate_pending(pending_size, max_size, + trace_migrate_pending(pending_size, threshold_size, pend_post, pend_nonpost); - if (pending_size && pending_size >= max_size) { + if (pending_size && pending_size >= threshold_size) { /* Still a significant amount to transfer */ if (migrate_postcopy_ram() && s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && - pend_nonpost <= max_size && + pend_nonpost <= threshold_size && atomic_read(&s->start_postcopy)) { if (!postcopy_start(s, &old_vm_running)) { @@ -2002,17 +2001,18 @@ static void *migration_thread(void *opaque) initial_bytes; uint64_t time_spent = current_time - initial_time; double bandwidth = (double)transferred_bytes / time_spent; - max_size = bandwidth * s->parameters.downtime_limit; + threshold_size = bandwidth * s->parameters.downtime_limit; s->mbps = (((double) transferred_bytes * 8.0) / ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; trace_migrate_transferred(transferred_bytes, time_spent, - bandwidth, max_size); + bandwidth, threshold_size); /* if we haven't sent anything, we don't want to recalculate 10000 is a small enough number for our purposes */ - if (s->dirty_bytes_rate && transferred_bytes > 10000) { - s->expected_downtime = s->dirty_bytes_rate / bandwidth; + if (ram_dirty_pages_rate() && transferred_bytes > 10000) { + s->expected_downtime = ram_dirty_pages_rate() * + qemu_target_page_size() / bandwidth; } qemu_file_reset_rate_limit(s->to_dst_file); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index dc80dbb67f..85fd8d72b3 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -123,7 +123,7 @@ bool postcopy_ram_supported_by_host(void) struct uffdio_range range_struct; uint64_t feature_mask; - if ((1ul << qemu_target_page_bits()) > pagesize) { + if (qemu_target_page_size() > pagesize) { error_report("Target page size bigger than host page size"); goto out; } @@ -213,8 +213,6 @@ out: static int init_range(const char *block_name, void *host_addr, ram_addr_t offset, ram_addr_t length, void *opaque) { - MigrationIncomingState *mis = opaque; - trace_postcopy_init_range(block_name, host_addr, offset, length); /* @@ -223,7 +221,7 @@ static int init_range(const char *block_name, void *host_addr, * - we're going to get the copy from the source anyway. * (Precopy will just overwrite this data, so doesn't need the discard) */ - if (ram_discard_range(mis, block_name, 0, length)) { + if (ram_discard_range(block_name, 0, length)) { return -1; } @@ -271,7 +269,7 @@ static int cleanup_range(const char *block_name, void *host_addr, */ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) { - if (qemu_ram_foreach_block(init_range, mis)) { + if (qemu_ram_foreach_block(init_range, NULL)) { return -1; } @@ -745,10 +743,10 @@ PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, unsigned long start, unsigned long length) { - size_t tp_bits = qemu_target_page_bits(); + size_t tp_size = qemu_target_page_size(); /* Convert to byte offsets within the RAM block */ - pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits; - pds->length_list[pds->cur_entry] = length << tp_bits; + pds->start_list[pds->cur_entry] = (start - pds->offset) * tp_size; + pds->length_list[pds->cur_entry] = length * tp_size; trace_postcopy_discard_send_range(pds->ramblock_name, start, length); pds->cur_entry++; pds->nsentwords++; diff --git a/migration/ram.c b/migration/ram.c index de1e0a3b18..f48664ec62 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -45,10 +45,6 @@ #include "qemu/rcu_queue.h" #include "migration/colo.h" -static int dirty_rate_high_cnt; - -static uint64_t bitmap_sync_count; - /***********************************************************/ /* ram save/restore */ @@ -96,11 +92,17 @@ static void XBZRLE_cache_unlock(void) qemu_mutex_unlock(&XBZRLE.lock); } -/* - * called from qmp_migrate_set_cache_size in main thread, possibly while - * a migration is in progress. - * A running migration maybe using the cache and might finish during this - * call, hence changes to the cache are protected by XBZRLE.lock(). +/** + * xbzrle_cache_resize: resize the xbzrle cache + * + * This function is called from qmp_migrate_set_cache_size in main + * thread, possibly while a migration is in progress. A running + * migration may be using the cache and might finish during this call, + * hence changes to the cache are protected by XBZRLE.lock(). + * + * Returns the new_size or negative in case of error. + * + * @new_size: new cache size */ int64_t xbzrle_cache_resize(int64_t new_size) { @@ -136,115 +138,171 @@ out: return ret; } -/* accounting for migration statistics */ -typedef struct AccountingInfo { - uint64_t dup_pages; - uint64_t skipped_pages; +struct RAMBitmap { + struct rcu_head rcu; + /* Main migration bitmap */ + unsigned long *bmap; + /* bitmap of pages that haven't been sent even once + * only maintained and used in postcopy at the moment + * where it's used to send the dirtymap at the start + * of the postcopy phase + */ + unsigned long *unsentmap; +}; +typedef struct RAMBitmap RAMBitmap; + +/* + * An outstanding page request, on the source, having been received + * and queued + */ +struct RAMSrcPageRequest { + RAMBlock *rb; + hwaddr offset; + hwaddr len; + + QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; +}; + +/* State of RAM for migration */ +struct RAMState { + /* QEMUFile used for this migration */ + QEMUFile *f; + /* Last block that we have visited searching for dirty pages */ + RAMBlock *last_seen_block; + /* Last block from where we have sent data */ + RAMBlock *last_sent_block; + /* Last dirty target page we have sent */ + ram_addr_t last_page; + /* last ram version we have seen */ + uint32_t last_version; + /* We are in the first round */ + bool ram_bulk_stage; + /* How many times we have dirty too many pages */ + int dirty_rate_high_cnt; + /* How many times we have synchronized the bitmap */ + uint64_t bitmap_sync_count; + /* these variables are used for bitmap sync */ + /* last time we did a full bitmap_sync */ + int64_t time_last_bitmap_sync; + /* bytes transferred at start_time */ + uint64_t bytes_xfer_prev; + /* number of dirty pages since start_time */ + uint64_t num_dirty_pages_period; + /* xbzrle misses since the beginning of the period */ + uint64_t xbzrle_cache_miss_prev; + /* number of iterations at the beginning of period */ + uint64_t iterations_prev; + /* Accounting fields */ + /* number of zero pages. It used to be pages filled by the same char. */ + uint64_t zero_pages; + /* number of normal transferred pages */ uint64_t norm_pages; + /* Iterations since start */ uint64_t iterations; + /* xbzrle transmitted bytes. Notice that this is with + * compression, they can't be calculated from the pages */ uint64_t xbzrle_bytes; + /* xbzrle transmmited pages */ uint64_t xbzrle_pages; + /* xbzrle number of cache miss */ uint64_t xbzrle_cache_miss; + /* xbzrle miss rate */ double xbzrle_cache_miss_rate; + /* xbzrle number of overflows */ uint64_t xbzrle_overflows; -} AccountingInfo; + /* number of dirty bits in the bitmap */ + uint64_t migration_dirty_pages; + /* total number of bytes transferred */ + uint64_t bytes_transferred; + /* number of dirtied pages in the last second */ + uint64_t dirty_pages_rate; + /* Count of requests incoming from destination */ + uint64_t postcopy_requests; + /* protects modification of the bitmap */ + QemuMutex bitmap_mutex; + /* Ram Bitmap protected by RCU */ + RAMBitmap *ram_bitmap; + /* The RAMBlock used in the last src_page_requests */ + RAMBlock *last_req_rb; + /* Queue of outstanding page requests from the destination */ + QemuMutex src_page_req_mutex; + QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; +}; +typedef struct RAMState RAMState; -static AccountingInfo acct_info; +static RAMState ram_state; -static void acct_clear(void) +uint64_t dup_mig_pages_transferred(void) { - memset(&acct_info, 0, sizeof(acct_info)); + return ram_state.zero_pages; } -uint64_t dup_mig_bytes_transferred(void) +uint64_t norm_mig_pages_transferred(void) { - return acct_info.dup_pages * TARGET_PAGE_SIZE; + return ram_state.norm_pages; } -uint64_t dup_mig_pages_transferred(void) +uint64_t xbzrle_mig_bytes_transferred(void) { - return acct_info.dup_pages; + return ram_state.xbzrle_bytes; } -uint64_t skipped_mig_bytes_transferred(void) +uint64_t xbzrle_mig_pages_transferred(void) { - return acct_info.skipped_pages * TARGET_PAGE_SIZE; + return ram_state.xbzrle_pages; } -uint64_t skipped_mig_pages_transferred(void) +uint64_t xbzrle_mig_pages_cache_miss(void) { - return acct_info.skipped_pages; + return ram_state.xbzrle_cache_miss; } -uint64_t norm_mig_bytes_transferred(void) +double xbzrle_mig_cache_miss_rate(void) { - return acct_info.norm_pages * TARGET_PAGE_SIZE; + return ram_state.xbzrle_cache_miss_rate; } -uint64_t norm_mig_pages_transferred(void) +uint64_t xbzrle_mig_pages_overflow(void) { - return acct_info.norm_pages; + return ram_state.xbzrle_overflows; } -uint64_t xbzrle_mig_bytes_transferred(void) +uint64_t ram_bytes_transferred(void) { - return acct_info.xbzrle_bytes; + return ram_state.bytes_transferred; } -uint64_t xbzrle_mig_pages_transferred(void) +uint64_t ram_bytes_remaining(void) { - return acct_info.xbzrle_pages; + return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE; } -uint64_t xbzrle_mig_pages_cache_miss(void) +uint64_t ram_dirty_sync_count(void) { - return acct_info.xbzrle_cache_miss; + return ram_state.bitmap_sync_count; } -double xbzrle_mig_cache_miss_rate(void) +uint64_t ram_dirty_pages_rate(void) { - return acct_info.xbzrle_cache_miss_rate; + return ram_state.dirty_pages_rate; } -uint64_t xbzrle_mig_pages_overflow(void) +uint64_t ram_postcopy_requests(void) { - return acct_info.xbzrle_overflows; + return ram_state.postcopy_requests; } -/* This is the last block that we have visited serching for dirty pages - */ -static RAMBlock *last_seen_block; -/* This is the last block from where we have sent data */ -static RAMBlock *last_sent_block; -static ram_addr_t last_offset; -static QemuMutex migration_bitmap_mutex; -static uint64_t migration_dirty_pages; -static uint32_t last_version; -static bool ram_bulk_stage; - /* used by the search for pages to send */ struct PageSearchStatus { /* Current block being searched */ RAMBlock *block; - /* Current offset to search from */ - ram_addr_t offset; + /* Current page to search from */ + unsigned long page; /* Set once we wrap around */ bool complete_round; }; typedef struct PageSearchStatus PageSearchStatus; -static struct BitmapRcu { - struct rcu_head rcu; - /* Main migration bitmap */ - unsigned long *bmap; - /* bitmap of pages that haven't been sent even once - * only maintained and used in postcopy at the moment - * where it's used to send the dirtymap at the start - * of the postcopy phase - */ - unsigned long *unsentmap; -} *migration_bitmap_rcu; - struct CompressParam { bool done; bool quit; @@ -278,7 +336,6 @@ static QemuCond comp_done_cond; /* The empty QEMUFileOps will be used by file in CompressParam */ static const QEMUFileOps empty_ops = { }; -static bool compression_switch; static DecompressParam *decomp_param; static QemuThread *decompress_threads; static QemuMutex decomp_done_lock; @@ -323,6 +380,7 @@ static inline void terminate_compression_threads(void) int idx, thread_count; thread_count = migrate_compress_threads(); + for (idx = 0; idx < thread_count; idx++) { qemu_mutex_lock(&comp_param[idx].mutex); comp_param[idx].quit = true; @@ -361,7 +419,6 @@ void migrate_compress_threads_create(void) if (!migrate_use_compression()) { return; } - compression_switch = true; thread_count = migrate_compress_threads(); compress_threads = g_new0(QemuThread, thread_count); comp_param = g_new0(CompressParam, thread_count); @@ -383,38 +440,45 @@ void migrate_compress_threads_create(void) } /** - * save_page_header: Write page header to wire + * save_page_header: write page header to wire * * If this is the 1st block, it also writes the block identification * - * Returns: Number of bytes written + * Returns the number of bytes written * * @f: QEMUFile where to send the data * @block: block that contains the page we want to send * @offset: offset inside the block for the page * in the lower bits, it contains flags */ -static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) +static size_t save_page_header(RAMState *rs, RAMBlock *block, ram_addr_t offset) { size_t size, len; - qemu_put_be64(f, offset); + if (block == rs->last_sent_block) { + offset |= RAM_SAVE_FLAG_CONTINUE; + } + qemu_put_be64(rs->f, offset); size = 8; if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { len = strlen(block->idstr); - qemu_put_byte(f, len); - qemu_put_buffer(f, (uint8_t *)block->idstr, len); + qemu_put_byte(rs->f, len); + qemu_put_buffer(rs->f, (uint8_t *)block->idstr, len); size += 1 + len; + rs->last_sent_block = block; } return size; } -/* Reduce amount of guest cpu execution to hopefully slow down memory writes. - * If guest dirty memory rate is reduced below the rate at which we can - * transfer pages to the destination then we should be able to complete - * migration. Some workloads dirty memory way too fast and will not effectively - * converge, even with auto-converge. +/** + * mig_throttle_guest_down: throotle down the guest + * + * Reduce amount of guest cpu execution to hopefully slow down memory + * writes. If guest dirty memory rate is reduced below the rate at + * which we can transfer pages to the destination then we should be + * able to complete migration. Some workloads dirty memory way too + * fast and will not effectively converge, even with auto-converge. */ static void mig_throttle_guest_down(void) { @@ -431,22 +495,28 @@ static void mig_throttle_guest_down(void) } } -/* Update the xbzrle cache to reflect a page that's been sent as all 0. +/** + * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache + * + * @rs: current RAM state + * @current_addr: address for the zero page + * + * Update the xbzrle cache to reflect a page that's been sent as all 0. * The important thing is that a stale (not-yet-0'd) page be replaced * by the new data. * As a bonus, if the page wasn't in the cache it gets added so that - * when a small write is made into the 0'd page it gets XBZRLE sent + * when a small write is made into the 0'd page it gets XBZRLE sent. */ -static void xbzrle_cache_zero_page(ram_addr_t current_addr) +static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) { - if (ram_bulk_stage || !migrate_use_xbzrle()) { + if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { return; } /* We don't care if this fails to allocate a new cache page * as long as it updated an old one */ cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, - bitmap_sync_count); + rs->bitmap_sync_count); } #define ENCODING_FLAG_XBZRLE 0x1 @@ -458,27 +528,25 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr) * 0 means that page is identical to the one already sent * -1 means that xbzrle would be longer than normal * - * @f: QEMUFile where to send the data - * @current_data: - * @current_addr: + * @rs: current RAM state + * @current_data: pointer to the address of the page contents + * @current_addr: addr of the page * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes */ -static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, +static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, bool last_stage, - uint64_t *bytes_transferred) + ram_addr_t offset, bool last_stage) { int encoded_len = 0, bytes_xbzrle; uint8_t *prev_cached_page; - if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { - acct_info.xbzrle_cache_miss++; + if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) { + rs->xbzrle_cache_miss++; if (!last_stage) { if (cache_insert(XBZRLE.cache, current_addr, *current_data, - bitmap_sync_count) == -1) { + rs->bitmap_sync_count) == -1) { return -1; } else { /* update *current_data when the page has been @@ -503,7 +571,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, return 0; } else if (encoded_len == -1) { trace_save_xbzrle_page_overflow(); - acct_info.xbzrle_overflows++; + rs->xbzrle_overflows++; /* update data in the cache */ if (!last_stage) { memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); @@ -518,92 +586,86 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, } /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); - qemu_put_byte(f, ENCODING_FLAG_XBZRLE); - qemu_put_be16(f, encoded_len); - qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_xbzrle = save_page_header(rs, block, + offset | RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); + qemu_put_be16(rs->f, encoded_len); + qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); bytes_xbzrle += encoded_len + 1 + 2; - acct_info.xbzrle_pages++; - acct_info.xbzrle_bytes += bytes_xbzrle; - *bytes_transferred += bytes_xbzrle; + rs->xbzrle_pages++; + rs->xbzrle_bytes += bytes_xbzrle; + rs->bytes_transferred += bytes_xbzrle; return 1; } -/* Called with rcu_read_lock() to protect migration_bitmap - * rb: The RAMBlock to search for dirty pages in - * start: Start address (typically so we can continue from previous page) - * ram_addr_abs: Pointer into which to store the address of the dirty page - * within the global ram_addr space +/** + * migration_bitmap_find_dirty: find the next dirty page from start + * + * Called with rcu_read_lock() to protect migration_bitmap * - * Returns: byte offset within memory region of the start of a dirty page + * Returns the byte offset within memory region of the start of a dirty page + * + * @rs: current RAM state + * @rb: RAMBlock where to search for dirty pages + * @start: page where we start the search */ static inline -ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb, - ram_addr_t start, - ram_addr_t *ram_addr_abs) +unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, + unsigned long start) { unsigned long base = rb->offset >> TARGET_PAGE_BITS; - unsigned long nr = base + (start >> TARGET_PAGE_BITS); + unsigned long nr = base + start; uint64_t rb_size = rb->used_length; unsigned long size = base + (rb_size >> TARGET_PAGE_BITS); unsigned long *bitmap; unsigned long next; - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - if (ram_bulk_stage && nr > base) { + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + if (rs->ram_bulk_stage && nr > base) { next = nr + 1; } else { next = find_next_bit(bitmap, size, nr); } - *ram_addr_abs = next << TARGET_PAGE_BITS; - return (next - base) << TARGET_PAGE_BITS; + return next - base; } -static inline bool migration_bitmap_clear_dirty(ram_addr_t addr) +static inline bool migration_bitmap_clear_dirty(RAMState *rs, + RAMBlock *rb, + unsigned long page) { bool ret; - int nr = addr >> TARGET_PAGE_BITS; - unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + unsigned long nr = (rb->offset >> TARGET_PAGE_BITS) + page; ret = test_and_clear_bit(nr, bitmap); if (ret) { - migration_dirty_pages--; + rs->migration_dirty_pages--; } return ret; } -static int64_t num_dirty_pages_period; -static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) +static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, + ram_addr_t start, ram_addr_t length) { unsigned long *bitmap; - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap, - start, length, &num_dirty_pages_period); -} - -/* Fix me: there are too many global variables used in migration process. */ -static int64_t start_time; -static int64_t bytes_xfer_prev; -static uint64_t xbzrle_cache_miss_prev; -static uint64_t iterations_prev; - -static void migration_bitmap_sync_init(void) -{ - start_time = 0; - bytes_xfer_prev = 0; - num_dirty_pages_period = 0; - xbzrle_cache_miss_prev = 0; - iterations_prev = 0; + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + rs->migration_dirty_pages += + cpu_physical_memory_sync_dirty_bitmap(bitmap, rb, start, length, + &rs->num_dirty_pages_period); } -/* Returns a summary bitmap of the page sizes of all RAMBlocks; - * for VMs with just normal pages this is equivalent to the - * host page size. If it's got some huge pages then it's the OR - * of all the different page sizes. +/** + * ram_pagesize_summary: calculate all the pagesizes of a VM + * + * Returns a summary bitmap of the page sizes of all RAMBlocks + * + * For VMs with just normal pages this is equivalent to the host page + * size. If it's got some huge pages then it's the OR of all the + * different page sizes. */ uint64_t ram_pagesize_summary(void) { @@ -617,40 +679,39 @@ uint64_t ram_pagesize_summary(void) return summary; } -static void migration_bitmap_sync(void) +static void migration_bitmap_sync(RAMState *rs) { RAMBlock *block; - MigrationState *s = migrate_get_current(); int64_t end_time; - int64_t bytes_xfer_now; + uint64_t bytes_xfer_now; - bitmap_sync_count++; + rs->bitmap_sync_count++; - if (!bytes_xfer_prev) { - bytes_xfer_prev = ram_bytes_transferred(); + if (!rs->bytes_xfer_prev) { + rs->bytes_xfer_prev = ram_bytes_transferred(); } - if (!start_time) { - start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if (!rs->time_last_bitmap_sync) { + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); } trace_migration_bitmap_sync_start(); memory_global_dirty_log_sync(); - qemu_mutex_lock(&migration_bitmap_mutex); + qemu_mutex_lock(&rs->bitmap_mutex); rcu_read_lock(); QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - migration_bitmap_sync_range(block->offset, block->used_length); + migration_bitmap_sync_range(rs, block, 0, block->used_length); } rcu_read_unlock(); - qemu_mutex_unlock(&migration_bitmap_mutex); + qemu_mutex_unlock(&rs->bitmap_mutex); - trace_migration_bitmap_sync_end(num_dirty_pages_period); + trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); /* more than 1 second = 1000 millisecons */ - if (end_time > start_time + 1000) { + if (end_time > rs->time_last_bitmap_sync + 1000) { if (migrate_auto_converge()) { /* The following detection logic can be refined later. For now: Check to see if the dirtied bytes is 50% more than the approx. @@ -659,94 +720,87 @@ static void migration_bitmap_sync(void) throttling */ bytes_xfer_now = ram_bytes_transferred(); - if (s->dirty_pages_rate && - (num_dirty_pages_period * TARGET_PAGE_SIZE > - (bytes_xfer_now - bytes_xfer_prev)/2) && - (dirty_rate_high_cnt++ >= 2)) { + if (rs->dirty_pages_rate && + (rs->num_dirty_pages_period * TARGET_PAGE_SIZE > + (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && + (rs->dirty_rate_high_cnt++ >= 2)) { trace_migration_throttle(); - dirty_rate_high_cnt = 0; + rs->dirty_rate_high_cnt = 0; mig_throttle_guest_down(); } - bytes_xfer_prev = bytes_xfer_now; + rs->bytes_xfer_prev = bytes_xfer_now; } if (migrate_use_xbzrle()) { - if (iterations_prev != acct_info.iterations) { - acct_info.xbzrle_cache_miss_rate = - (double)(acct_info.xbzrle_cache_miss - - xbzrle_cache_miss_prev) / - (acct_info.iterations - iterations_prev); + if (rs->iterations_prev != rs->iterations) { + rs->xbzrle_cache_miss_rate = + (double)(rs->xbzrle_cache_miss - + rs->xbzrle_cache_miss_prev) / + (rs->iterations - rs->iterations_prev); } - iterations_prev = acct_info.iterations; - xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; + rs->iterations_prev = rs->iterations; + rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss; } - s->dirty_pages_rate = num_dirty_pages_period * 1000 - / (end_time - start_time); - s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; - start_time = end_time; - num_dirty_pages_period = 0; + rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000 + / (end_time - rs->time_last_bitmap_sync); + rs->time_last_bitmap_sync = end_time; + rs->num_dirty_pages_period = 0; } - s->dirty_sync_count = bitmap_sync_count; if (migrate_use_events()) { - qapi_event_send_migration_pass(bitmap_sync_count, NULL); + qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL); } } /** - * save_zero_page: Send the zero page to the stream + * save_zero_page: send the zero page to the stream * - * Returns: Number of pages written. + * Returns the number of pages written. * - * @f: QEMUFile where to send the data + * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @p: pointer to the page - * @bytes_transferred: increase it with the number of transferred bytes */ -static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, - uint8_t *p, uint64_t *bytes_transferred) +static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + uint8_t *p) { int pages = -1; if (is_zero_range(p, TARGET_PAGE_SIZE)) { - acct_info.dup_pages++; - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_COMPRESS); - qemu_put_byte(f, 0); - *bytes_transferred += 1; + rs->zero_pages++; + rs->bytes_transferred += + save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS); + qemu_put_byte(rs->f, 0); + rs->bytes_transferred += 1; pages = 1; } return pages; } -static void ram_release_pages(MigrationState *ms, const char *block_name, - uint64_t offset, int pages) +static void ram_release_pages(const char *rbname, uint64_t offset, int pages) { - if (!migrate_release_ram() || !migration_in_postcopy(ms)) { + if (!migrate_release_ram() || !migration_in_postcopy()) { return; } - ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS); + ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); } /** - * ram_save_page: Send the given page to the stream + * ram_save_page: send the given page to the stream * - * Returns: Number of pages written. + * Returns the number of pages written. * < 0 - error * >=0 - Number of pages written - this might legally be 0 * if xbzrle noticed the page was the same. * - * @ms: The current migration state. - * @f: QEMUFile where to send the data + * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes */ -static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, - bool last_stage, uint64_t *bytes_transferred) +static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) { int pages = -1; uint64_t bytes_xmit; @@ -755,16 +809,16 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, int ret; bool send_async = true; RAMBlock *block = pss->block; - ram_addr_t offset = pss->offset; + ram_addr_t offset = pss->page << TARGET_PAGE_BITS; p = block->host + offset; /* In doubt sent page as normal */ bytes_xmit = 0; - ret = ram_control_save_page(f, block->offset, + ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, &bytes_xmit); if (bytes_xmit) { - *bytes_transferred += bytes_xmit; + rs->bytes_transferred += bytes_xmit; pages = 1; } @@ -772,29 +826,26 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, current_addr = block->offset + offset; - if (block == last_sent_block) { - offset |= RAM_SAVE_FLAG_CONTINUE; - } if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret != RAM_SAVE_CONTROL_DELAYED) { if (bytes_xmit > 0) { - acct_info.norm_pages++; + rs->norm_pages++; } else if (bytes_xmit == 0) { - acct_info.dup_pages++; + rs->zero_pages++; } } } else { - pages = save_zero_page(f, block, offset, p, bytes_transferred); + pages = save_zero_page(rs, block, offset, p); if (pages > 0) { /* Must let xbzrle know, otherwise a previous (now 0'd) cached * page would be stale */ - xbzrle_cache_zero_page(current_addr); - ram_release_pages(ms, block->idstr, pss->offset, pages); - } else if (!ram_bulk_stage && - !migration_in_postcopy(ms) && migrate_use_xbzrle()) { - pages = save_xbzrle_page(f, &p, current_addr, block, - offset, last_stage, bytes_transferred); + xbzrle_cache_zero_page(rs, current_addr); + ram_release_pages(block->idstr, offset, pages); + } else if (!rs->ram_bulk_stage && + !migration_in_postcopy() && migrate_use_xbzrle()) { + pages = save_xbzrle_page(rs, &p, current_addr, block, + offset, last_stage); if (!last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire @@ -806,18 +857,18 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, /* XBZRLE overflow or normal page */ if (pages == -1) { - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_PAGE); + rs->bytes_transferred += save_page_header(rs, block, + offset | RAM_SAVE_FLAG_PAGE); if (send_async) { - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE, + qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE, migrate_release_ram() & - migration_in_postcopy(ms)); + migration_in_postcopy()); } else { - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); + qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE); } - *bytes_transferred += TARGET_PAGE_SIZE; + rs->bytes_transferred += TARGET_PAGE_SIZE; pages = 1; - acct_info.norm_pages++; + rs->norm_pages++; } XBZRLE_cache_unlock(); @@ -828,10 +879,11 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) { + RAMState *rs = &ram_state; int bytes_sent, blen; uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); - bytes_sent = save_page_header(f, block, offset | + bytes_sent = save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, migrate_compress_level()); @@ -841,16 +893,13 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, error_report("compressed data failed!"); } else { bytes_sent += blen; - ram_release_pages(migrate_get_current(), block->idstr, - offset & TARGET_PAGE_MASK, 1); + ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); } return bytes_sent; } -static uint64_t bytes_transferred; - -static void flush_compressed_data(QEMUFile *f) +static void flush_compressed_data(RAMState *rs) { int idx, len, thread_count; @@ -870,8 +919,8 @@ static void flush_compressed_data(QEMUFile *f) for (idx = 0; idx < thread_count; idx++) { qemu_mutex_lock(&comp_param[idx].mutex); if (!comp_param[idx].quit) { - len = qemu_put_qemu_file(f, comp_param[idx].file); - bytes_transferred += len; + len = qemu_put_qemu_file(rs->f, comp_param[idx].file); + rs->bytes_transferred += len; } qemu_mutex_unlock(&comp_param[idx].mutex); } @@ -884,9 +933,8 @@ static inline void set_compress_params(CompressParam *param, RAMBlock *block, param->offset = offset; } -static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, - ram_addr_t offset, - uint64_t *bytes_transferred) +static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, + ram_addr_t offset) { int idx, thread_count, bytes_xmit = -1, pages = -1; @@ -896,14 +944,14 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, for (idx = 0; idx < thread_count; idx++) { if (comp_param[idx].done) { comp_param[idx].done = false; - bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file); + bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); qemu_mutex_lock(&comp_param[idx].mutex); set_compress_params(&comp_param[idx], block, offset); qemu_cond_signal(&comp_param[idx].cond); qemu_mutex_unlock(&comp_param[idx].mutex); pages = 1; - acct_info.norm_pages++; - *bytes_transferred += bytes_xmit; + rs->norm_pages++; + rs->bytes_transferred += bytes_xmit; break; } } @@ -921,40 +969,37 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, /** * ram_save_compressed_page: compress the given page and send it to the stream * - * Returns: Number of pages written. + * Returns the number of pages written. * - * @ms: The current migration state. - * @f: QEMUFile where to send the data + * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes */ -static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f, - PageSearchStatus *pss, bool last_stage, - uint64_t *bytes_transferred) +static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss, + bool last_stage) { int pages = -1; uint64_t bytes_xmit = 0; uint8_t *p; int ret, blen; RAMBlock *block = pss->block; - ram_addr_t offset = pss->offset; + ram_addr_t offset = pss->page << TARGET_PAGE_BITS; p = block->host + offset; - ret = ram_control_save_page(f, block->offset, + ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, &bytes_xmit); if (bytes_xmit) { - *bytes_transferred += bytes_xmit; + rs->bytes_transferred += bytes_xmit; pages = 1; } if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret != RAM_SAVE_CONTROL_DELAYED) { if (bytes_xmit > 0) { - acct_info.norm_pages++; + rs->norm_pages++; } else if (bytes_xmit == 0) { - acct_info.dup_pages++; + rs->zero_pages++; } } } else { @@ -964,35 +1009,33 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f, * out, keeping this order is important, because the 'cont' flag * is used to avoid resending the block name. */ - if (block != last_sent_block) { - flush_compressed_data(f); - pages = save_zero_page(f, block, offset, p, bytes_transferred); + if (block != rs->last_sent_block) { + flush_compressed_data(rs); + pages = save_zero_page(rs, block, offset, p); if (pages == -1) { /* Make sure the first page is sent out before other pages */ - bytes_xmit = save_page_header(f, block, offset | + bytes_xmit = save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); - blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, + blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE, migrate_compress_level()); if (blen > 0) { - *bytes_transferred += bytes_xmit + blen; - acct_info.norm_pages++; + rs->bytes_transferred += bytes_xmit + blen; + rs->norm_pages++; pages = 1; } else { - qemu_file_set_error(f, blen); + qemu_file_set_error(rs->f, blen); error_report("compressed data failed!"); } } if (pages > 0) { - ram_release_pages(ms, block->idstr, pss->offset, pages); + ram_release_pages(block->idstr, offset, pages); } } else { - offset |= RAM_SAVE_FLAG_CONTINUE; - pages = save_zero_page(f, block, offset, p, bytes_transferred); + pages = save_zero_page(rs, block, offset, p); if (pages == -1) { - pages = compress_page_with_multi_thread(f, block, offset, - bytes_transferred); + pages = compress_page_with_multi_thread(rs, block, offset); } else { - ram_release_pages(ms, block->idstr, pss->offset, pages); + ram_release_pages(block->idstr, offset, pages); } } } @@ -1000,25 +1043,21 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f, return pages; } -/* - * Find the next dirty page and update any state associated with - * the search process. +/** + * find_dirty_block: find the next dirty page and update any state + * associated with the search process. * - * Returns: True if a page is found + * Returns if a page is found * - * @f: Current migration stream. - * @pss: Data about the state of the current dirty page scan. - * @*again: Set to false if the search has scanned the whole of RAM - * *ram_addr_abs: Pointer into which to store the address of the dirty page - * within the global ram_addr space + * @rs: current RAM state + * @pss: data about the state of the current dirty page scan + * @again: set to false if the search has scanned the whole of RAM */ -static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, - bool *again, ram_addr_t *ram_addr_abs) +static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) { - pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset, - ram_addr_abs); - if (pss->complete_round && pss->block == last_seen_block && - pss->offset >= last_offset) { + pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); + if (pss->complete_round && pss->block == rs->last_seen_block && + pss->page >= rs->last_page) { /* * We've been once around the RAM and haven't found anything. * Give up. @@ -1026,22 +1065,21 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, *again = false; return false; } - if (pss->offset >= pss->block->used_length) { + if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { /* Didn't find anything in this RAM Block */ - pss->offset = 0; + pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { /* Hit the end of the list */ pss->block = QLIST_FIRST_RCU(&ram_list.blocks); /* Flag that we've looped */ pss->complete_round = true; - ram_bulk_stage = false; + rs->ram_bulk_stage = false; if (migrate_use_xbzrle()) { /* If xbzrle is on, stop using the data compression at this * point. In theory, xbzrle can do better than compression. */ - flush_compressed_data(f); - compression_switch = false; + flush_compressed_data(rs); } } /* Didn't find anything this time, but try again on the new block */ @@ -1055,61 +1093,59 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, } } -/* +/** + * unqueue_page: gets a page of the queue + * * Helper for 'get_queued_page' - gets a page off the queue - * ms: MigrationState in - * *offset: Used to return the offset within the RAMBlock - * ram_addr_abs: global offset in the dirty/sent bitmaps * - * Returns: block (or NULL if none available) + * Returns the block of the page (or NULL if none available) + * + * @rs: current RAM state + * @offset: used to return the offset within the RAMBlock */ -static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset, - ram_addr_t *ram_addr_abs) +static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) { RAMBlock *block = NULL; - qemu_mutex_lock(&ms->src_page_req_mutex); - if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) { - struct MigrationSrcPageRequest *entry = - QSIMPLEQ_FIRST(&ms->src_page_requests); + qemu_mutex_lock(&rs->src_page_req_mutex); + if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { + struct RAMSrcPageRequest *entry = + QSIMPLEQ_FIRST(&rs->src_page_requests); block = entry->rb; *offset = entry->offset; - *ram_addr_abs = (entry->offset + entry->rb->offset) & - TARGET_PAGE_MASK; if (entry->len > TARGET_PAGE_SIZE) { entry->len -= TARGET_PAGE_SIZE; entry->offset += TARGET_PAGE_SIZE; } else { memory_region_unref(block->mr); - QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); g_free(entry); } } - qemu_mutex_unlock(&ms->src_page_req_mutex); + qemu_mutex_unlock(&rs->src_page_req_mutex); return block; } -/* - * Unqueue a page from the queue fed by postcopy page requests; skips pages - * that are already sent (!dirty) +/** + * get_queued_page: unqueue a page from the postocpy requests + * + * Skips pages that are already sent (!dirty) * - * ms: MigrationState in - * pss: PageSearchStatus structure updated with found block/offset - * ram_addr_abs: global offset in the dirty/sent bitmaps + * Returns if a queued page is found * - * Returns: true if a queued page is found + * @rs: current RAM state + * @pss: data about the state of the current dirty page scan */ -static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, - ram_addr_t *ram_addr_abs) +static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) { RAMBlock *block; ram_addr_t offset; bool dirty; do { - block = unqueue_page(ms, &offset, ram_addr_abs); + block = unqueue_page(rs, &offset); /* * We're sending this page, and since it's postcopy nothing else * will dirty it, and we must make sure it doesn't get sent again @@ -1118,18 +1154,18 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, */ if (block) { unsigned long *bitmap; - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap); + unsigned long page; + + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + page = (block->offset + offset) >> TARGET_PAGE_BITS; + dirty = test_bit(page, bitmap); if (!dirty) { - trace_get_queued_page_not_dirty( - block->idstr, (uint64_t)offset, - (uint64_t)*ram_addr_abs, - test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, - atomic_rcu_read(&migration_bitmap_rcu)->unsentmap)); + trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, + page, + test_bit(page, + atomic_rcu_read(&rs->ram_bitmap)->unsentmap)); } else { - trace_get_queued_page(block->idstr, - (uint64_t)offset, - (uint64_t)*ram_addr_abs); + trace_get_queued_page(block->idstr, (uint64_t)offset, page); } } @@ -1142,7 +1178,7 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, * in (migration_bitmap_find_and_reset_dirty) that every page is * dirty, that's no longer true. */ - ram_bulk_stage = false; + rs->ram_bulk_stage = false; /* * We want the background search to continue from the queued page @@ -1150,52 +1186,58 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, * it just requested. */ pss->block = block; - pss->offset = offset; + pss->page = offset >> TARGET_PAGE_BITS; } return !!block; } /** - * flush_page_queue: Flush any remaining pages in the ram request queue - * it should be empty at the end anyway, but in error cases there may be - * some left. + * migration_page_queue_free: drop any remaining pages in the ram + * request queue + * + * It should be empty at the end anyway, but in error cases there may + * be some left. in case that there is any page left, we drop it. * - * ms: MigrationState */ -void flush_page_queue(MigrationState *ms) +void migration_page_queue_free(void) { - struct MigrationSrcPageRequest *mspr, *next_mspr; + struct RAMSrcPageRequest *mspr, *next_mspr; + RAMState *rs = &ram_state; /* This queue generally should be empty - but in the case of a failed * migration might have some droppings in. */ rcu_read_lock(); - QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) { + QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { memory_region_unref(mspr->rb->mr); - QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); g_free(mspr); } rcu_read_unlock(); } /** - * Queue the pages for transmission, e.g. a request from postcopy destination - * ms: MigrationStatus in which the queue is held - * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last) - * start: Offset from the start of the RAMBlock - * len: Length (in bytes) to send - * Return: 0 on success + * ram_save_queue_pages: queue the page for transmission + * + * A request from postcopy destination for example. + * + * Returns zero on success or negative on error + * + * @rbname: Name of the RAMBLock of the request. NULL means the + * same that last one. + * @start: starting address from the start of the RAMBlock + * @len: length (in bytes) to send */ -int ram_save_queue_pages(MigrationState *ms, const char *rbname, - ram_addr_t start, ram_addr_t len) +int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) { RAMBlock *ramblock; + RAMState *rs = &ram_state; - ms->postcopy_requests++; + rs->postcopy_requests++; rcu_read_lock(); if (!rbname) { /* Reuse last RAMBlock */ - ramblock = ms->last_req_rb; + ramblock = rs->last_req_rb; if (!ramblock) { /* @@ -1213,7 +1255,7 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname, error_report("ram_save_queue_pages no block '%s'", rbname); goto err; } - ms->last_req_rb = ramblock; + rs->last_req_rb = ramblock; } trace_ram_save_queue_pages(ramblock->idstr, start, len); if (start+len > ramblock->used_length) { @@ -1223,16 +1265,16 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname, goto err; } - struct MigrationSrcPageRequest *new_entry = - g_malloc0(sizeof(struct MigrationSrcPageRequest)); + struct RAMSrcPageRequest *new_entry = + g_malloc0(sizeof(struct RAMSrcPageRequest)); new_entry->rb = ramblock; new_entry->offset = start; new_entry->len = len; memory_region_ref(ramblock->mr); - qemu_mutex_lock(&ms->src_page_req_mutex); - QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req); - qemu_mutex_unlock(&ms->src_page_req_mutex); + qemu_mutex_lock(&rs->src_page_req_mutex); + QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); + qemu_mutex_unlock(&rs->src_page_req_mutex); rcu_read_unlock(); return 0; @@ -1243,51 +1285,43 @@ err: } /** - * ram_save_target_page: Save one target page + * ram_save_target_page: save one target page * + * Returns the number of pages written * - * @f: QEMUFile where to send the data - * @block: pointer to block that contains the page we want to send - * @offset: offset inside the block for the page; + * @rs: current RAM state + * @ms: current migration state + * @pss: data about the page we want to send * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space - * - * Returns: Number of pages written. */ -static int ram_save_target_page(MigrationState *ms, QEMUFile *f, - PageSearchStatus *pss, - bool last_stage, - uint64_t *bytes_transferred, - ram_addr_t dirty_ram_abs) +static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, + bool last_stage) { int res = 0; /* Check the pages is dirty and if it is send it */ - if (migration_bitmap_clear_dirty(dirty_ram_abs)) { + if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { unsigned long *unsentmap; - if (compression_switch && migrate_use_compression()) { - res = ram_save_compressed_page(ms, f, pss, - last_stage, - bytes_transferred); + /* + * If xbzrle is on, stop using the data compression after first + * round of migration even if compression is enabled. In theory, + * xbzrle can do better than compression. + */ + unsigned long page = + (pss->block->offset >> TARGET_PAGE_BITS) + pss->page; + if (migrate_use_compression() + && (rs->ram_bulk_stage || !migrate_use_xbzrle())) { + res = ram_save_compressed_page(rs, pss, last_stage); } else { - res = ram_save_page(ms, f, pss, last_stage, - bytes_transferred); + res = ram_save_page(rs, pss, last_stage); } if (res < 0) { return res; } - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; if (unsentmap) { - clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap); - } - /* Only update last_sent_block if a block was actually sent; xbzrle - * might have decided the page was identical so didn't bother writing - * to the stream. - */ - if (res > 0) { - last_sent_block = pss->block; + clear_bit(page, unsentmap); } } @@ -1295,83 +1329,70 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f, } /** - * ram_save_host_page: Starting at *offset send pages up to the end - * of the current host page. It's valid for the initial - * offset to point into the middle of a host page - * in which case the remainder of the hostpage is sent. - * Only dirty target pages are sent. - * Note that the host page size may be a huge page for this - * block. + * ram_save_host_page: save a whole host page * - * Returns: Number of pages written. + * Starting at *offset send pages up to the end of the current host + * page. It's valid for the initial offset to point into the middle of + * a host page in which case the remainder of the hostpage is sent. + * Only dirty target pages are sent. Note that the host page size may + * be a huge page for this block. * - * @f: QEMUFile where to send the data - * @block: pointer to block that contains the page we want to send - * @offset: offset inside the block for the page; updated to last target page - * sent + * Returns the number of pages written or negative on error + * + * @rs: current RAM state + * @ms: current migration state + * @pss: data about the page we want to send * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space */ -static int ram_save_host_page(MigrationState *ms, QEMUFile *f, - PageSearchStatus *pss, - bool last_stage, - uint64_t *bytes_transferred, - ram_addr_t dirty_ram_abs) +static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + bool last_stage) { int tmppages, pages = 0; - size_t pagesize = qemu_ram_pagesize(pss->block); + size_t pagesize_bits = + qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; do { - tmppages = ram_save_target_page(ms, f, pss, last_stage, - bytes_transferred, dirty_ram_abs); + tmppages = ram_save_target_page(rs, pss, last_stage); if (tmppages < 0) { return tmppages; } pages += tmppages; - pss->offset += TARGET_PAGE_SIZE; - dirty_ram_abs += TARGET_PAGE_SIZE; - } while (pss->offset & (pagesize - 1)); + pss->page++; + } while (pss->page & (pagesize_bits - 1)); /* The offset we leave with is the last one we looked at */ - pss->offset -= TARGET_PAGE_SIZE; + pss->page--; return pages; } /** - * ram_find_and_save_block: Finds a dirty page and sends it to f + * ram_find_and_save_block: finds a dirty page and sends it to f * * Called within an RCU critical section. * - * Returns: The number of pages written - * 0 means no dirty pages + * Returns the number of pages written where zero means no dirty pages * - * @f: QEMUFile where to send the data + * @rs: current RAM state * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes * * On systems where host-page-size > target-page-size it will send all the * pages in a host page that are dirty. */ -static int ram_find_and_save_block(QEMUFile *f, bool last_stage, - uint64_t *bytes_transferred) +static int ram_find_and_save_block(RAMState *rs, bool last_stage) { PageSearchStatus pss; - MigrationState *ms = migrate_get_current(); int pages = 0; bool again, found; - ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in - ram_addr_t space */ /* No dirty page as there is zero RAM */ if (!ram_bytes_total()) { return pages; } - pss.block = last_seen_block; - pss.offset = last_offset; + pss.block = rs->last_seen_block; + pss.page = rs->last_page; pss.complete_round = false; if (!pss.block) { @@ -1380,22 +1401,20 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, do { again = true; - found = get_queued_page(ms, &pss, &dirty_ram_abs); + found = get_queued_page(rs, &pss); if (!found) { /* priority queue empty, so just search for something dirty */ - found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); + found = find_dirty_block(rs, &pss, &again); } if (found) { - pages = ram_save_host_page(ms, f, &pss, - last_stage, bytes_transferred, - dirty_ram_abs); + pages = ram_save_host_page(rs, &pss, last_stage); } } while (!pages && again); - last_seen_block = pss.block; - last_offset = pss.offset; + rs->last_seen_block = pss.block; + rs->last_page = pss.page; return pages; } @@ -1403,30 +1422,17 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, void acct_update_position(QEMUFile *f, size_t size, bool zero) { uint64_t pages = size / TARGET_PAGE_SIZE; + RAMState *rs = &ram_state; + if (zero) { - acct_info.dup_pages += pages; + rs->zero_pages += pages; } else { - acct_info.norm_pages += pages; - bytes_transferred += size; + rs->norm_pages += pages; + rs->bytes_transferred += size; qemu_update_position(f, size); } } -static ram_addr_t ram_save_remaining(void) -{ - return migration_dirty_pages; -} - -uint64_t ram_bytes_remaining(void) -{ - return ram_save_remaining() * TARGET_PAGE_SIZE; -} - -uint64_t ram_bytes_transferred(void) -{ - return bytes_transferred; -} - uint64_t ram_bytes_total(void) { RAMBlock *block; @@ -1445,7 +1451,7 @@ void free_xbzrle_decoded_buf(void) xbzrle_decoded_buf = NULL; } -static void migration_bitmap_free(struct BitmapRcu *bmap) +static void migration_bitmap_free(RAMBitmap *bmap) { g_free(bmap->bmap); g_free(bmap->unsentmap); @@ -1454,11 +1460,13 @@ static void migration_bitmap_free(struct BitmapRcu *bmap) static void ram_migration_cleanup(void *opaque) { + RAMState *rs = opaque; + /* caller have hold iothread lock or is in a bh, so there is * no writing race against this migration_bitmap */ - struct BitmapRcu *bitmap = migration_bitmap_rcu; - atomic_rcu_set(&migration_bitmap_rcu, NULL); + RAMBitmap *bitmap = rs->ram_bitmap; + atomic_rcu_set(&rs->ram_bitmap, NULL); if (bitmap) { memory_global_dirty_log_stop(); call_rcu(bitmap, migration_bitmap_free, rcu); @@ -1477,49 +1485,17 @@ static void ram_migration_cleanup(void *opaque) XBZRLE_cache_unlock(); } -static void reset_ram_globals(void) +static void ram_state_reset(RAMState *rs) { - last_seen_block = NULL; - last_sent_block = NULL; - last_offset = 0; - last_version = ram_list.version; - ram_bulk_stage = true; + rs->last_seen_block = NULL; + rs->last_sent_block = NULL; + rs->last_page = 0; + rs->last_version = ram_list.version; + rs->ram_bulk_stage = true; } #define MAX_WAIT 50 /* ms, half buffered_file limit */ -void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) -{ - /* called in qemu main thread, so there is - * no writing race against this migration_bitmap - */ - if (migration_bitmap_rcu) { - struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap; - bitmap = g_new(struct BitmapRcu, 1); - bitmap->bmap = bitmap_new(new); - - /* prevent migration_bitmap content from being set bit - * by migration_bitmap_sync_range() at the same time. - * it is safe to migration if migration_bitmap is cleared bit - * at the same time. - */ - qemu_mutex_lock(&migration_bitmap_mutex); - bitmap_copy(bitmap->bmap, old_bitmap->bmap, old); - bitmap_set(bitmap->bmap, old, new - old); - - /* We don't have a way to safely extend the sentmap - * with RCU; so mark it as missing, entry to postcopy - * will fail. - */ - bitmap->unsentmap = NULL; - - atomic_rcu_set(&migration_bitmap_rcu, bitmap); - qemu_mutex_unlock(&migration_bitmap_mutex); - migration_dirty_pages += new - old; - call_rcu(old_bitmap, migration_bitmap_free, rcu); - } -} - /* * 'expected' is the value you expect the bitmap mostly to be full * of; it won't bother printing lines that are all this value. @@ -1527,14 +1503,14 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) */ void ram_debug_dump_bitmap(unsigned long *todump, bool expected) { - int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; - + unsigned long ram_pages = last_ram_page(); + RAMState *rs = &ram_state; int64_t cur; int64_t linelen = 128; char linebuf[129]; if (!todump) { - todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + todump = atomic_rcu_read(&rs->ram_bitmap)->bmap; } for (cur = 0; cur < ram_pages; cur += linelen) { @@ -1563,8 +1539,9 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected) void ram_postcopy_migrated_memory_release(MigrationState *ms) { + RAMState *rs = &ram_state; struct RAMBlock *block; - unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { unsigned long first = block->offset >> TARGET_PAGE_BITS; @@ -1573,30 +1550,38 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) while (run_start < range) { unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); - ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS, + ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, (run_end - run_start) << TARGET_PAGE_BITS); run_start = find_next_zero_bit(bitmap, range, run_end + 1); } } } -/* +/** + * postcopy_send_discard_bm_ram: discard a RAMBlock + * + * Returns zero on success + * * Callback from postcopy_each_ram_send_discard for each RAMBlock * Note: At this point the 'unsentmap' is the processed bitmap combined * with the dirtymap; so a '1' means it's either dirty or unsent. - * start,length: Indexes into the bitmap for the first bit - * representing the named block and length in target-pages + * + * @ms: current migration state + * @pds: state for postcopy + * @start: RAMBlock starting page + * @length: RAMBlock size */ static int postcopy_send_discard_bm_ram(MigrationState *ms, PostcopyDiscardState *pds, unsigned long start, unsigned long length) { + RAMState *rs = &ram_state; unsigned long end = start + length; /* one after the end */ unsigned long current; unsigned long *unsentmap; - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; for (current = start; current < end; ) { unsigned long one = find_next_bit(unsentmap, end, current); @@ -1621,13 +1606,18 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, return 0; } -/* +/** + * postcopy_each_ram_send_discard: discard all RAMBlocks + * + * Returns 0 for success or negative for error + * * Utility for the outgoing postcopy code. * Calls postcopy_send_discard_bm_ram for each RAMBlock * passing it bitmap indexes and name. - * Returns: 0 on success * (qemu_ram_foreach_block ends up passing unscaled lengths * which would mean postcopy code would have to deal with target page) + * + * @ms: current migration state */ static int postcopy_each_ram_send_discard(MigrationState *ms) { @@ -1656,22 +1646,27 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) return 0; } -/* - * Helper for postcopy_chunk_hostpages; it's called twice to cleanup - * the two bitmaps, that are similar, but one is inverted. +/** + * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages + * + * Helper for postcopy_chunk_hostpages; it's called twice to + * canonicalize the two bitmaps, that are similar, but one is + * inverted. * - * We search for runs of target-pages that don't start or end on a - * host page boundary; - * unsent_pass=true: Cleans up partially unsent host pages by searching - * the unsentmap - * unsent_pass=false: Cleans up partially dirty host pages by searching - * the main migration bitmap + * Postcopy requires that all target pages in a hostpage are dirty or + * clean, not a mix. This function canonicalizes the bitmaps. * + * @ms: current migration state + * @unsent_pass: if true we need to canonicalize partially unsent host pages + * otherwise we need to canonicalize partially dirty host pages + * @block: block that contains the page we want to canonicalize + * @pds: state for postcopy */ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, RAMBlock *block, PostcopyDiscardState *pds) { + RAMState *rs = &ram_state; unsigned long *bitmap; unsigned long *unsentmap; unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; @@ -1685,8 +1680,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, return; } - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; if (unsent_pass) { /* Find a sent page */ @@ -1769,7 +1764,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * Remark them as dirty, updating the count for any pages * that weren't previously dirty. */ - migration_dirty_pages += !test_and_set_bit(page, bitmap); + rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); } } @@ -1784,23 +1779,28 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } } -/* +/** + * postcopy_chuck_hostpages: discrad any partially sent host page + * * Utility for the outgoing postcopy code. * * Discard any partially sent host-page size chunks, mark any partially * dirty host-page size chunks as all dirty. In this case the host-page * is the host-page for the particular RAMBlock, i.e. it might be a huge page * - * Returns: 0 on success + * Returns zero on success + * + * @ms: current migration state */ static int postcopy_chunk_hostpages(MigrationState *ms) { + RAMState *rs = &ram_state; struct RAMBlock *block; /* Easiest way to make sure we don't resume in the middle of a host-page */ - last_seen_block = NULL; - last_sent_block = NULL; - last_offset = 0; + rs->last_seen_block = NULL; + rs->last_sent_block = NULL; + rs->last_page = 0; QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { unsigned long first = block->offset >> TARGET_PAGE_BITS; @@ -1822,7 +1822,11 @@ static int postcopy_chunk_hostpages(MigrationState *ms) return 0; } -/* +/** + * ram_postcopy_send_discard_bitmap: transmit the discard bitmap + * + * Returns zero on success + * * Transmit the set of pages to be discarded after precopy to the target * these are pages that: * a) Have been previously transmitted but are now dirty again @@ -1830,18 +1834,21 @@ static int postcopy_chunk_hostpages(MigrationState *ms) * any pages on the destination that have been mapped by background * tasks get discarded (transparent huge pages is the specific concern) * Hopefully this is pretty sparse + * + * @ms: current migration state */ int ram_postcopy_send_discard_bitmap(MigrationState *ms) { + RAMState *rs = &ram_state; int ret; unsigned long *bitmap, *unsentmap; rcu_read_lock(); /* This should be our last sync, the src is now paused */ - migration_bitmap_sync(); + migration_bitmap_sync(rs); - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; if (!unsentmap) { /* We don't have a safe way to resize the sentmap, so * if the bitmap was resized it will be NULL at this @@ -1862,9 +1869,8 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) /* * Update the unsentmap to be unsentmap = unsentmap | dirty */ - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - bitmap_or(unsentmap, unsentmap, bitmap, - last_ram_offset() >> TARGET_PAGE_BITS); + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + bitmap_or(unsentmap, unsentmap, bitmap, last_ram_page()); trace_ram_postcopy_send_discard_bitmap(); @@ -1878,28 +1884,27 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) return ret; } -/* - * At the start of the postcopy phase of migration, any now-dirty - * precopied pages are discarded. +/** + * ram_discard_range: discard dirtied pages at the beginning of postcopy * - * start, length describe a byte address range within the RAMBlock + * Returns zero on success * - * Returns 0 on success. + * @rbname: name of the RAMBlock of the request. NULL means the + * same that last one. + * @start: RAMBlock starting page + * @length: RAMBlock size */ -int ram_discard_range(MigrationIncomingState *mis, - const char *block_name, - uint64_t start, size_t length) +int ram_discard_range(const char *rbname, uint64_t start, size_t length) { int ret = -1; - trace_ram_discard_range(block_name, start, length); + trace_ram_discard_range(rbname, start, length); rcu_read_lock(); - RAMBlock *rb = qemu_ram_block_by_name(block_name); + RAMBlock *rb = qemu_ram_block_by_name(rbname); if (!rb) { - error_report("ram_discard_range: Failed to find block '%s'", - block_name); + error_report("ram_discard_range: Failed to find block '%s'", rbname); goto err; } @@ -1911,14 +1916,14 @@ err: return ret; } -static int ram_save_init_globals(void) +static int ram_state_init(RAMState *rs) { - int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ + unsigned long ram_bitmap_pages; - dirty_rate_high_cnt = 0; - bitmap_sync_count = 0; - migration_bitmap_sync_init(); - qemu_mutex_init(&migration_bitmap_mutex); + memset(rs, 0, sizeof(*rs)); + qemu_mutex_init(&rs->bitmap_mutex); + qemu_mutex_init(&rs->src_page_req_mutex); + QSIMPLEQ_INIT(&rs->src_page_requests); if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); @@ -1947,8 +1952,6 @@ static int ram_save_init_globals(void) XBZRLE.encoded_buf = NULL; return -1; } - - acct_clear(); } /* For memory_global_dirty_log_start below. */ @@ -1956,19 +1959,18 @@ static int ram_save_init_globals(void) qemu_mutex_lock_ramlist(); rcu_read_lock(); - bytes_transferred = 0; - reset_ram_globals(); + ram_state_reset(rs); - migration_bitmap_rcu = g_new0(struct BitmapRcu, 1); + rs->ram_bitmap = g_new0(RAMBitmap, 1); /* Skip setting bitmap if there is no RAM */ if (ram_bytes_total()) { - ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; - migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages); - bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages); + ram_bitmap_pages = last_ram_page(); + rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages); + bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages); if (migrate_postcopy_ram()) { - migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages); - bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages); + rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages); + bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages); } } @@ -1976,10 +1978,10 @@ static int ram_save_init_globals(void) * Count the total number of pages used by ram blocks not including any * gaps due to alignment or unplugs. */ - migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; + rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; memory_global_dirty_log_start(); - migration_bitmap_sync(); + migration_bitmap_sync(rs); qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); rcu_read_unlock(); @@ -1987,22 +1989,33 @@ static int ram_save_init_globals(void) return 0; } -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has +/* + * Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code * start to become numerous it will be necessary to reduce the * granularity of these critical sections. */ +/** + * ram_save_setup: Setup RAM for migration + * + * Returns zero to indicate success and negative for error + * + * @f: QEMUFile where to send the data + * @opaque: RAMState pointer + */ static int ram_save_setup(QEMUFile *f, void *opaque) { + RAMState *rs = opaque; RAMBlock *block; /* migration has already setup the bitmap, reuse it. */ if (!migration_in_colo_state()) { - if (ram_save_init_globals() < 0) { + if (ram_state_init(rs) < 0) { return -1; } } + rs->f = f; rcu_read_lock(); @@ -2027,16 +2040,25 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return 0; } +/** + * ram_save_iterate: iterative stage for migration + * + * Returns zero to indicate success and negative for error + * + * @f: QEMUFile where to send the data + * @opaque: RAMState pointer + */ static int ram_save_iterate(QEMUFile *f, void *opaque) { + RAMState *rs = opaque; int ret; int i; int64_t t0; int done = 0; rcu_read_lock(); - if (ram_list.version != last_version) { - reset_ram_globals(); + if (ram_list.version != rs->last_version) { + ram_state_reset(rs); } /* Read version before ram_list.blocks */ @@ -2049,13 +2071,13 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) while ((ret = qemu_file_rate_limit(f)) == 0) { int pages; - pages = ram_find_and_save_block(f, false, &bytes_transferred); + pages = ram_find_and_save_block(rs, false); /* no more pages to sent */ if (pages == 0) { done = 1; break; } - acct_info.iterations++; + rs->iterations++; /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. @@ -2071,7 +2093,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } i++; } - flush_compressed_data(f); + flush_compressed_data(rs); rcu_read_unlock(); /* @@ -2081,7 +2103,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_ROUND); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - bytes_transferred += 8; + rs->bytes_transferred += 8; ret = qemu_file_get_error(f); if (ret < 0) { @@ -2091,13 +2113,24 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return done; } -/* Called with iothread lock */ +/** + * ram_save_complete: function called to send the remaining amount of ram + * + * Returns zero to indicate success + * + * Called with iothread lock + * + * @f: QEMUFile where to send the data + * @opaque: RAMState pointer + */ static int ram_save_complete(QEMUFile *f, void *opaque) { + RAMState *rs = opaque; + rcu_read_lock(); - if (!migration_in_postcopy(migrate_get_current())) { - migration_bitmap_sync(); + if (!migration_in_postcopy()) { + migration_bitmap_sync(rs); } ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -2108,15 +2141,14 @@ static int ram_save_complete(QEMUFile *f, void *opaque) while (true) { int pages; - pages = ram_find_and_save_block(f, !migration_in_colo_state(), - &bytes_transferred); + pages = ram_find_and_save_block(rs, !migration_in_colo_state()); /* no more blocks to sent */ if (pages == 0) { break; } } - flush_compressed_data(f); + flush_compressed_data(rs); ram_control_after_iterate(f, RAM_CONTROL_FINISH); rcu_read_unlock(); @@ -2130,18 +2162,19 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, uint64_t *non_postcopiable_pending, uint64_t *postcopiable_pending) { + RAMState *rs = opaque; uint64_t remaining_size; - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; - if (!migration_in_postcopy(migrate_get_current()) && + if (!migration_in_postcopy() && remaining_size < max_size) { qemu_mutex_lock_iothread(); rcu_read_lock(); - migration_bitmap_sync(); + migration_bitmap_sync(rs); rcu_read_unlock(); qemu_mutex_unlock_iothread(); - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } /* We can do postcopy, and all the data is postcopiable */ @@ -2185,17 +2218,17 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) return 0; } -/* Must be called from within a rcu critical section. +/** + * ram_block_from_stream: read a RAMBlock id from the migration stream + * + * Must be called from within a rcu critical section. + * * Returns a pointer from within the RCU-protected ram_list. - */ -/* - * Read a RAMBlock ID from the stream f. * - * f: Stream to read from - * flags: Page flags (mostly to see if it's a continuation of previous block) + * @f: QEMUFile where to read the data from + * @flags: Page flags (mostly to see if it's a continuation of previous block) */ -static inline RAMBlock *ram_block_from_stream(QEMUFile *f, - int flags) +static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) { static RAMBlock *block = NULL; char id[256]; @@ -2232,9 +2265,15 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, return block->host + offset; } -/* +/** + * ram_handle_compressed: handle the zero page case + * * If a page (or a whole RDMA chunk) has been * determined to be zero, then zap it. + * + * @host: host address for the zero page + * @ch: what the page is filled from. We only support zero + * @size: size of the zero page */ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) { @@ -2373,20 +2412,33 @@ static void decompress_data_with_multi_threads(QEMUFile *f, qemu_mutex_unlock(&decomp_done_lock); } -/* - * Allocate data structures etc needed by incoming migration with postcopy-ram - * postcopy-ram's similarly names postcopy_ram_incoming_init does the work +/** + * ram_postcopy_incoming_init: allocate postcopy data structures + * + * Returns 0 for success and negative if there was one error + * + * @mis: current migration incoming state + * + * Allocate data structures etc needed by incoming migration with + * postcopy-ram. postcopy-ram's similarly names + * postcopy_ram_incoming_init does the work. */ int ram_postcopy_incoming_init(MigrationIncomingState *mis) { - size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + unsigned long ram_pages = last_ram_page(); return postcopy_ram_incoming_init(mis, ram_pages); } -/* +/** + * ram_load_postcopy: load a page in postcopy case + * + * Returns 0 for success or -errno in case of error + * * Called in postcopy mode by ram_load(). * rcu_read_lock is taken prior to this being called. + * + * @f: QEMUFile where to send the data */ static int ram_load_postcopy(QEMUFile *f) { @@ -2673,5 +2725,5 @@ static SaveVMHandlers savevm_ram_handlers = { void ram_mig_init(void) { qemu_mutex_init(&XBZRLE.lock); - register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); + register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); } diff --git a/migration/savevm.c b/migration/savevm.c index 3b19a4a274..03ae1bdeb4 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -871,7 +871,7 @@ void qemu_savevm_send_postcopy_advise(QEMUFile *f) { uint64_t tmp[2]; tmp[0] = cpu_to_be64(ram_pagesize_summary()); - tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits()); + tmp[1] = cpu_to_be64(qemu_target_page_size()); trace_qemu_savevm_send_postcopy_advise(); qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp); @@ -1062,7 +1062,7 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) static bool should_send_vmdesc(void) { MachineState *machine = MACHINE(qdev_get_machine()); - bool in_postcopy = migration_in_postcopy(migrate_get_current()); + bool in_postcopy = migration_in_postcopy(); return !machine->suppress_vmdesc && !in_postcopy; } @@ -1111,7 +1111,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) int vmdesc_len; SaveStateEntry *se; int ret; - bool in_postcopy = migration_in_postcopy(migrate_get_current()); + bool in_postcopy = migration_in_postcopy(); trace_savevm_state_complete_precopy(); @@ -1197,7 +1197,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) * the result is split into the amount for units that can and * for units that can't do postcopy. */ -void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, +void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size, uint64_t *res_non_postcopiable, uint64_t *res_postcopiable) { @@ -1216,7 +1216,7 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, continue; } } - se->ops->save_live_pending(f, se->opaque, max_size, + se->ops->save_live_pending(f, se->opaque, threshold_size, res_non_postcopiable, res_postcopiable); } } @@ -1390,13 +1390,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) } remote_tps = qemu_get_be64(mis->from_src_file); - if (remote_tps != (1ul << qemu_target_page_bits())) { + if (remote_tps != qemu_target_page_size()) { /* * Again, some differences could be dealt with, but for now keep it * simple. */ - error_report("Postcopy needs matching target page sizes (s=%d d=%d)", - (int)remote_tps, 1 << qemu_target_page_bits()); + error_report("Postcopy needs matching target page sizes (s=%d d=%zd)", + (int)remote_tps, qemu_target_page_size()); return -1; } @@ -1479,8 +1479,7 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis, block_length = qemu_get_be64(mis->from_src_file); len -= 16; - int ret = ram_discard_range(mis, ramid, start_addr, - block_length); + int ret = ram_discard_range(ramid, start_addr, block_length); if (ret) { return ret; } diff --git a/migration/trace-events b/migration/trace-events index 7372ce2a51..b8f01a218c 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -63,8 +63,8 @@ put_qtailq_end(const char *name, const char *reason) "%s %s" qemu_file_fclose(void) "" # migration/ram.c -get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64 -get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)" +get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/%" PRIx64 " page_abs=%lx" +get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs, int sent) "%s/%" PRIx64 " page_abs=%lx (sent=%d)" migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_throttle(void) "" diff --git a/qapi-schema.json b/qapi-schema.json index 250e4dc49b..01b087fa16 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -598,6 +598,9 @@ # @postcopy-requests: The number of page requests received from the destination # (since 2.7) # +# @page-size: The number of bytes per page for the various page-based +# statistics (since 2.10) +# # Since: 0.14.0 ## { 'struct': 'MigrationStats', @@ -605,7 +608,7 @@ 'duplicate': 'int', 'skipped': 'int', 'normal': 'int', 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', 'mbps' : 'number', 'dirty-sync-count' : 'int', - 'postcopy-requests' : 'int' } } + 'postcopy-requests' : 'int', 'page-size' : 'int' } } ## # @XBZRLECacheStats: diff --git a/qdev-monitor.c b/qdev-monitor.c index 5f2fcdfc45..e61d596ef7 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -29,6 +29,7 @@ #include "qemu/error-report.h" #include "qemu/help_option.h" #include "sysemu/block-backend.h" +#include "migration/migration.h" /* * Aliases were a bad idea from the start. Let's keep them @@ -603,6 +604,11 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) return NULL; } + if (!migration_is_idle()) { + error_setg(errp, "device_add not allowed while migrating"); + return NULL; + } + /* create device */ dev = DEVICE(object_new(driver)); @@ -836,6 +842,45 @@ static DeviceState *find_device_state(const char *id, Error **errp) return DEVICE(obj); } +void qdev_unplug(DeviceState *dev, Error **errp) +{ + DeviceClass *dc = DEVICE_GET_CLASS(dev); + HotplugHandler *hotplug_ctrl; + HotplugHandlerClass *hdc; + + if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name); + return; + } + + if (!dc->hotpluggable) { + error_setg(errp, QERR_DEVICE_NO_HOTPLUG, + object_get_typename(OBJECT(dev))); + return; + } + + if (!migration_is_idle()) { + error_setg(errp, "device_del not allowed while migrating"); + return; + } + + qdev_hot_removed = true; + + hotplug_ctrl = qdev_get_hotplug_handler(dev); + /* hotpluggable device MUST have HotplugHandler, if it doesn't + * then something is very wrong with it */ + g_assert(hotplug_ctrl); + + /* If device supports async unplug just request it to be done, + * otherwise just remove it synchronously */ + hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl); + if (hdc->unplug_request) { + hotplug_handler_unplug_request(hotplug_ctrl, dev, errp); + } else { + hotplug_handler_unplug(hotplug_ctrl, dev, errp); + } +} + void qmp_device_del(const char *id, Error **errp) { DeviceState *dev = find_device_state(id, errp); |