diff options
-rw-r--r-- | hmp-commands.hx | 12 | ||||
-rw-r--r-- | hmp.c | 13 | ||||
-rw-r--r-- | hmp.h | 1 | ||||
-rw-r--r-- | include/exec/ram_addr.h | 10 | ||||
-rw-r--r-- | migration/migration.c | 129 | ||||
-rw-r--r-- | migration/migration.h | 11 | ||||
-rw-r--r-- | migration/page_cache.c | 25 | ||||
-rw-r--r-- | migration/page_cache.h | 7 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 54 | ||||
-rw-r--r-- | migration/postcopy-ram.h | 4 | ||||
-rw-r--r-- | migration/ram.c | 259 | ||||
-rw-r--r-- | migration/ram.h | 7 | ||||
-rw-r--r-- | migration/tls.c | 1 | ||||
-rw-r--r-- | qapi/migration.json | 36 |
14 files changed, 431 insertions, 138 deletions
diff --git a/hmp-commands.hx b/hmp-commands.hx index 1941e19932..4afd57cf5f 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -959,7 +959,19 @@ STEXI @item migrate_cancel @findex migrate_cancel Cancel the current VM migration. +ETEXI + { + .name = "migrate_continue", + .args_type = "state:s", + .params = "state", + .help = "Continue migration from the given paused state", + .cmd = hmp_migrate_continue, + }, +STEXI +@item migrate_continue @var{state} +@findex migrate_continue +Continue migration from the paused state @var{state} ETEXI { @@ -1495,6 +1495,19 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict) qmp_migrate_cancel(NULL); } +void hmp_migrate_continue(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *state = qdict_get_str(qdict, "state"); + int val = qapi_enum_parse(&MigrationStatus_lookup, state, -1, &err); + + if (val >= 0) { + qmp_migrate_continue(val, &err); + } + + hmp_handle_error(mon, &err); +} + void hmp_migrate_incoming(Monitor *mon, const QDict *qdict) { Error *err = NULL; @@ -68,6 +68,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict); void hmp_delvm(Monitor *mon, const QDict *qdict); void hmp_info_snapshots(Monitor *mon, const QDict *qdict); void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); +void hmp_migrate_continue(Monitor *mon, const QDict *qdict); void hmp_migrate_incoming(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index d017639f7e..6cbc02aa0f 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -47,6 +47,8 @@ struct RAMBlock { * of the postcopy phase */ unsigned long *unsentmap; + /* bitmap of already received pages in postcopy */ + unsigned long *receivedmap; }; static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) return (char *)block->host + offset; } +static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, + RAMBlock *rb) +{ + uint64_t host_addr_offset = + (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); + return host_addr_offset >> TARGET_PAGE_BITS; +} + long qemu_getrampagesize(void); unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, diff --git a/migration/migration.c b/migration/migration.c index 98429dc843..62761d5705 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -104,6 +104,9 @@ enum mig_rp_message_type { static MigrationState *current_migration; static bool migration_object_check(MigrationState *ms, Error **errp); +static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state); void migration_object_init(void) { @@ -526,6 +529,8 @@ static bool migration_is_setup_or_active(int state) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_SETUP: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: return true; default: @@ -600,6 +605,8 @@ MigrationInfo *qmp_query_migrate(Error **errp) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_CANCELLING: case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: /* TODO add some postcopy stats */ info->has_status = true; info->has_total_time = true; @@ -865,6 +872,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_block_incremental) { dest->block_incremental = params->block_incremental; } + if (params->has_x_multifd_channels) { + dest->x_multifd_channels = params->x_multifd_channels; + } + if (params->has_x_multifd_page_count) { + dest->x_multifd_page_count = params->x_multifd_page_count; + } } static void migrate_params_apply(MigrateSetParameters *params) @@ -1071,19 +1084,30 @@ static void migrate_fd_cleanup(void *opaque) MIGRATION_STATUS_CANCELLED); } + if (s->error) { + /* It is used on info migrate. We can't free it */ + error_report_err(error_copy(s->error)); + } notifier_list_notify(&migration_state_notifiers, s); block_cleanup_parameters(s); } +void migrate_set_error(MigrationState *s, const Error *error) +{ + qemu_mutex_lock(&s->error_mutex); + if (!s->error) { + s->error = error_copy(error); + } + qemu_mutex_unlock(&s->error_mutex); +} + void migrate_fd_error(MigrationState *s, const Error *error) { trace_migrate_fd_error(error_get_pretty(error)); assert(s->to_dst_file == NULL); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); - if (!s->error) { - s->error = error_copy(error); - } + migrate_set_error(s, error); notifier_list_notify(&migration_state_notifiers, s); block_cleanup_parameters(s); } @@ -1104,6 +1128,10 @@ static void migrate_fd_cancel(MigrationState *s) if (!migration_is_setup_or_active(old_state)) { break; } + /* If the migration is paused, kick it out of the pause */ + if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { + qemu_sem_post(&s->pause_sem); + } migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); } while (s->state != MIGRATION_STATUS_CANCELLING); @@ -1183,6 +1211,8 @@ bool migration_is_idle(void) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_COLO: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: return false; case MIGRATION_STATUS__MAX: g_assert_not_reached(); @@ -1362,29 +1392,24 @@ void qmp_migrate_cancel(Error **errp) migrate_fd_cancel(migrate_get_current()); } -void qmp_migrate_set_cache_size(int64_t value, Error **errp) +void qmp_migrate_continue(MigrationStatus state, Error **errp) { MigrationState *s = migrate_get_current(); - int64_t new_size; - - /* Check for truncation */ - if (value != (size_t)value) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "exceeding address space"); + if (s->state != state) { + error_setg(errp, "Migration not in expected state: %s", + MigrationStatus_str(s->state)); return; } + qemu_sem_post(&s->pause_sem); +} - /* Cache should not be larger than guest ram size */ - if (value > ram_bytes_total()) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "exceeds guest ram size "); - return; - } +void qmp_migrate_set_cache_size(int64_t value, Error **errp) +{ + MigrationState *s = migrate_get_current(); + int64_t new_size; - new_size = xbzrle_cache_resize(value); + new_size = xbzrle_cache_resize(value, errp); if (new_size < 0) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "is smaller than page size"); return; } @@ -1521,6 +1546,16 @@ bool migrate_use_multifd(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; } +bool migrate_pause_before_switchover(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[ + MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; +} + int migrate_multifd_channels(void) { MigrationState *s; @@ -1799,8 +1834,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) QEMUFile *fb; int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); bool restart_block = false; - migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_POSTCOPY_ACTIVE); + int cur_state = MIGRATION_STATUS_ACTIVE; + if (!migrate_pause_before_switchover()) { + migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + } trace_postcopy_start(); qemu_mutex_lock_iothread(); @@ -1814,6 +1852,12 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) goto fail; } + ret = migration_maybe_pause(ms, &cur_state, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + if (ret < 0) { + goto fail; + } + ret = bdrv_inactivate_all(); if (ret < 0) { goto fail; @@ -1952,6 +1996,41 @@ fail: } /** + * migration_maybe_pause: Pause if required to by + * migrate_pause_before_switchover called with the iothread locked + * Returns: 0 on success + */ +static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state) +{ + if (!migrate_pause_before_switchover()) { + return 0; + } + + /* Since leaving this state is not atomic with posting the semaphore + * it's possible that someone could have issued multiple migrate_continue + * and the semaphore is incorrectly positive at this point; + * the docs say it's undefined to reinit a semaphore that's already + * init'd, so use timedwait to eat up any existing posts. + */ + while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { + /* This block intentionally left blank */ + } + + qemu_mutex_unlock_iothread(); + migrate_set_state(&s->state, *current_active_state, + MIGRATION_STATUS_PRE_SWITCHOVER); + qemu_sem_wait(&s->pause_sem); + migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, + new_state); + *current_active_state = new_state; + qemu_mutex_lock_iothread(); + + return s->state == new_state ? 0 : -EINVAL; +} + +/** * migration_completion: Used by migration_thread when there's not much left. * The caller 'breaks' the loop when this returns. * @@ -1977,6 +2056,10 @@ static void migration_completion(MigrationState *s, int current_active_state, bool inactivate = !migrate_colo_enabled(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); if (ret >= 0) { + ret = migration_maybe_pause(s, ¤t_active_state, + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, inactivate); @@ -2355,8 +2438,10 @@ static void migration_instance_finalize(Object *obj) MigrationState *ms = MIGRATION_OBJ(obj); MigrationParameters *params = &ms->parameters; + qemu_mutex_destroy(&ms->error_mutex); g_free(params->tls_hostname); g_free(params->tls_creds); + qemu_sem_destroy(&ms->pause_sem); } static void migration_instance_init(Object *obj) @@ -2367,6 +2452,8 @@ static void migration_instance_init(Object *obj) ms->state = MIGRATION_STATUS_NONE; ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; ms->mbps = -1; + qemu_sem_init(&ms->pause_sem, 0); + qemu_mutex_init(&ms->error_mutex); params->tls_hostname = g_strdup(""); params->tls_creds = g_strdup(""); diff --git a/migration/migration.h b/migration/migration.h index b83cceadc4..8ccdd7a577 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -121,6 +121,9 @@ struct MigrationState /* Flag set once the migration thread called bdrv_inactivate_all */ bool block_inactive; + /* Migration is paused due to pause-before-switchover */ + QemuSemaphore pause_sem; + /* The semaphore is used to notify COLO thread that failover is finished */ QemuSemaphore colo_exit_sem; @@ -129,8 +132,12 @@ struct MigrationState int64_t colo_checkpoint_time; QEMUTimer *colo_delay_timer; - /* The last error that occurred */ + /* The first error that has occurred. + We used the mutex to be able to return the 1st error message */ Error *error; + /* mutex to protect errp */ + QemuMutex error_mutex; + /* Do we have to clean up -b/-i from old migrate parameters */ /* This feature is deprecated and will be removed */ bool must_remove_block_options; @@ -159,6 +166,7 @@ bool migration_has_all_channels(void); uint64_t migrate_max_downtime(void); +void migrate_set_error(MigrationState *s, const Error *error); void migrate_fd_error(MigrationState *s, const Error *error); void migrate_fd_connect(MigrationState *s); @@ -177,6 +185,7 @@ bool migrate_zero_blocks(void); bool migrate_auto_converge(void); bool migrate_use_multifd(void); +bool migrate_pause_before_switchover(void); int migrate_multifd_channels(void); int migrate_multifd_page_count(void); diff --git a/migration/page_cache.c b/migration/page_cache.c index ba984c4858..9a9d13d6a2 100644 --- a/migration/page_cache.c +++ b/migration/page_cache.c @@ -14,6 +14,8 @@ #include "qemu/osdep.h" +#include "qapi/qmp/qerror.h" +#include "qapi/error.h" #include "qemu-common.h" #include "qemu/host-utils.h" #include "migration/page_cache.h" @@ -39,27 +41,28 @@ struct CacheItem { struct PageCache { CacheItem *page_cache; - unsigned int page_size; - int64_t max_num_items; - uint64_t max_item_age; - int64_t num_items; + size_t page_size; + size_t max_num_items; + size_t num_items; }; -PageCache *cache_init(int64_t num_pages, unsigned int page_size) +PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp) { int64_t i; - + size_t num_pages = new_size / page_size; PageCache *cache; - if (num_pages <= 0) { - DPRINTF("invalid number of pages\n"); + if (new_size < page_size) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "is smaller than one target page size"); return NULL; } /* We prefer not to abort if there is no memory */ cache = g_try_malloc(sizeof(*cache)); if (!cache) { - DPRINTF("Failed to allocate cache\n"); + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "Failed to allocate cache"); return NULL; } /* round down to the nearest power of 2 */ @@ -69,7 +72,6 @@ PageCache *cache_init(int64_t num_pages, unsigned int page_size) } cache->page_size = page_size; cache->num_items = 0; - cache->max_item_age = 0; cache->max_num_items = num_pages; DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items); @@ -78,7 +80,8 @@ PageCache *cache_init(int64_t num_pages, unsigned int page_size) cache->page_cache = g_try_malloc((cache->max_num_items) * sizeof(*cache->page_cache)); if (!cache->page_cache) { - DPRINTF("Failed to allocate cache->page_cache\n"); + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "Failed to allocate page cache"); g_free(cache); return NULL; } diff --git a/migration/page_cache.h b/migration/page_cache.h index 4fadd0c501..0cb94498a0 100644 --- a/migration/page_cache.h +++ b/migration/page_cache.h @@ -24,12 +24,11 @@ typedef struct PageCache PageCache; * * Returns new allocated cache or NULL on error * - * @cache pointer to the PageCache struct - * @num_pages: cache maximal number of cached pages + * @cache_size: cache size in bytes * @page_size: cache page size + * @errp: set *errp if the check failed, with reason */ -PageCache *cache_init(int64_t num_pages, unsigned int page_size); - +PageCache *cache_init(int64_t cache_size, size_t page_size, Error **errp); /** * cache_fini: free all cache resources * @cache pointer to the PageCache struct diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 0de68e8b25..bec6c2c66b 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -641,26 +641,46 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return 0; } +static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, + void *from_addr, uint64_t pagesize, RAMBlock *rb) +{ + int ret; + if (from_addr) { + struct uffdio_copy copy_struct; + copy_struct.dst = (uint64_t)(uintptr_t)host_addr; + copy_struct.src = (uint64_t)(uintptr_t)from_addr; + copy_struct.len = pagesize; + copy_struct.mode = 0; + ret = ioctl(userfault_fd, UFFDIO_COPY, ©_struct); + } else { + struct uffdio_zeropage zero_struct; + zero_struct.range.start = (uint64_t)(uintptr_t)host_addr; + zero_struct.range.len = pagesize; + zero_struct.mode = 0; + ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct); + } + if (!ret) { + ramblock_recv_bitmap_set_range(rb, host_addr, + pagesize / qemu_target_page_size()); + } + return ret; +} + /* * Place a host page (from) at (host) atomically * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, - size_t pagesize) + RAMBlock *rb) { - struct uffdio_copy copy_struct; - - copy_struct.dst = (uint64_t)(uintptr_t)host; - copy_struct.src = (uint64_t)(uintptr_t)from; - copy_struct.len = pagesize; - copy_struct.mode = 0; + size_t pagesize = qemu_ram_pagesize(rb); /* copy also acks to the kernel waking the stalled thread up * TODO: We can inhibit that ack and only do it if it was requested * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ - if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { + if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -677,17 +697,13 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { trace_postcopy_place_page_zero(host); - if (pagesize == getpagesize()) { - struct uffdio_zeropage zero_struct; - zero_struct.range.start = (uint64_t)(uintptr_t)host; - zero_struct.range.len = getpagesize(); - zero_struct.mode = 0; - - if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { + if (qemu_ram_pagesize(rb) == getpagesize()) { + if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), + rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); @@ -711,7 +727,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); } return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - pagesize); + rb); } return 0; @@ -774,14 +790,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) } int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, - size_t pagesize) + RAMBlock *rb) { assert(0); return -1; } int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize) + RAMBlock *rb) { assert(0); return -1; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 587a8b86a7..77ea0fd264 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms, * returns 0 on success */ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, - size_t pagesize); + RAMBlock *rb); /* * Place a zero page at (host) atomically * returns 0 on success */ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, - size_t pagesize); + RAMBlock *rb); /* The current postcopy state is read/set by postcopy_state_get/set * which update it atomically. diff --git a/migration/ram.c b/migration/ram.c index b83f8977c5..7f6327f708 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -42,8 +42,10 @@ #include "postcopy-ram.h" #include "migration/page_cache.h" #include "qemu/error-report.h" +#include "qapi/qmp/qerror.h" #include "trace.h" #include "exec/ram_addr.h" +#include "exec/target_page.h" #include "qemu/rcu_queue.h" #include "migration/colo.h" #include "migration/block.h" @@ -113,13 +115,24 @@ static void XBZRLE_cache_unlock(void) * Returns the new_size or negative in case of error. * * @new_size: new cache size + * @errp: set *errp if the check failed, with reason */ -int64_t xbzrle_cache_resize(int64_t new_size) +int64_t xbzrle_cache_resize(int64_t new_size, Error **errp) { PageCache *new_cache; int64_t ret; - if (new_size < TARGET_PAGE_SIZE) { + /* Check for truncation */ + if (new_size != (size_t)new_size) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "exceeding address space"); + return -1; + } + + /* Cache should not be larger than guest ram size */ + if (new_size > ram_bytes_total()) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", + "exceeds guest ram size"); return -1; } @@ -129,10 +142,8 @@ int64_t xbzrle_cache_resize(int64_t new_size) if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { goto out_new_size; } - new_cache = cache_init(new_size / TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); + new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp); if (!new_cache) { - error_report("Error creating cache"); ret = -1; goto out; } @@ -148,6 +159,35 @@ out: return ret; } +static void ramblock_recv_map_init(void) +{ + RAMBlock *rb; + + RAMBLOCK_FOREACH(rb) { + assert(!rb->receivedmap); + rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); + } +} + +int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr) +{ + return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), + rb->receivedmap); +} + +void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) +{ + set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); +} + +void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, + size_t nr) +{ + bitmap_set_atomic(rb->receivedmap, + ramblock_recv_bitmap_offset(host_addr, rb), + nr); +} + /* * An outstanding page request, on the source, having been received * and queued @@ -1566,6 +1606,31 @@ static void xbzrle_load_cleanup(void) XBZRLE.decoded_buf = NULL; } +static void ram_state_cleanup(RAMState **rsp) +{ + migration_page_queue_free(*rsp); + qemu_mutex_destroy(&(*rsp)->bitmap_mutex); + qemu_mutex_destroy(&(*rsp)->src_page_req_mutex); + g_free(*rsp); + *rsp = NULL; +} + +static void xbzrle_cleanup(void) +{ + XBZRLE_cache_lock(); + if (XBZRLE.cache) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.encoded_buf); + g_free(XBZRLE.current_buf); + g_free(XBZRLE.zero_target_page); + XBZRLE.cache = NULL; + XBZRLE.encoded_buf = NULL; + XBZRLE.current_buf = NULL; + XBZRLE.zero_target_page = NULL; + } + XBZRLE_cache_unlock(); +} + static void ram_save_cleanup(void *opaque) { RAMState **rsp = opaque; @@ -1583,22 +1648,9 @@ static void ram_save_cleanup(void *opaque) block->unsentmap = NULL; } - XBZRLE_cache_lock(); - if (XBZRLE.cache) { - cache_fini(XBZRLE.cache); - g_free(XBZRLE.encoded_buf); - g_free(XBZRLE.current_buf); - g_free(XBZRLE.zero_target_page); - XBZRLE.cache = NULL; - XBZRLE.encoded_buf = NULL; - XBZRLE.current_buf = NULL; - XBZRLE.zero_target_page = NULL; - } - XBZRLE_cache_unlock(); - migration_page_queue_free(*rsp); + xbzrle_cleanup(); compress_threads_save_cleanup(); - g_free(*rsp); - *rsp = NULL; + ram_state_cleanup(rsp); } static void ram_state_reset(RAMState *rs) @@ -1999,6 +2051,8 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) goto err; } + bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), + length >> qemu_target_page_bits()); ret = ram_block_discard_range(rb, start, length); err: @@ -2007,63 +2061,96 @@ err: return ret; } +/* + * For every allocation, we will try not to crash the VM if the + * allocation failed. + */ +static int xbzrle_init(void) +{ + Error *local_err = NULL; + + if (!migrate_use_xbzrle()) { + return 0; + } + + XBZRLE_cache_lock(); + + XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); + if (!XBZRLE.zero_target_page) { + error_report("%s: Error allocating zero page", __func__); + goto err_out; + } + + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), + TARGET_PAGE_SIZE, &local_err); + if (!XBZRLE.cache) { + error_report_err(local_err); + goto free_zero_page; + } + + XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); + if (!XBZRLE.encoded_buf) { + error_report("%s: Error allocating encoded_buf", __func__); + goto free_cache; + } + + XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); + if (!XBZRLE.current_buf) { + error_report("%s: Error allocating current_buf", __func__); + goto free_encoded_buf; + } + + /* We are all good */ + XBZRLE_cache_unlock(); + return 0; + +free_encoded_buf: + g_free(XBZRLE.encoded_buf); + XBZRLE.encoded_buf = NULL; +free_cache: + cache_fini(XBZRLE.cache); + XBZRLE.cache = NULL; +free_zero_page: + g_free(XBZRLE.zero_target_page); + XBZRLE.zero_target_page = NULL; +err_out: + XBZRLE_cache_unlock(); + return -ENOMEM; +} + static int ram_state_init(RAMState **rsp) { - *rsp = g_new0(RAMState, 1); + *rsp = g_try_new0(RAMState, 1); + + if (!*rsp) { + error_report("%s: Init ramstate fail", __func__); + return -1; + } qemu_mutex_init(&(*rsp)->bitmap_mutex); qemu_mutex_init(&(*rsp)->src_page_req_mutex); QSIMPLEQ_INIT(&(*rsp)->src_page_requests); - if (migrate_use_xbzrle()) { - XBZRLE_cache_lock(); - XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE); - XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / - TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); - if (!XBZRLE.cache) { - XBZRLE_cache_unlock(); - error_report("Error creating cache"); - g_free(*rsp); - *rsp = NULL; - return -1; - } - XBZRLE_cache_unlock(); - - /* We prefer not to abort if there is no memory */ - XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); - if (!XBZRLE.encoded_buf) { - error_report("Error allocating encoded_buf"); - g_free(*rsp); - *rsp = NULL; - return -1; - } + /* + * Count the total number of pages used by ram blocks not including any + * gaps due to alignment or unplugs. + */ + (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; - XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); - if (!XBZRLE.current_buf) { - error_report("Error allocating current_buf"); - g_free(XBZRLE.encoded_buf); - XBZRLE.encoded_buf = NULL; - g_free(*rsp); - *rsp = NULL; - return -1; - } - } + ram_state_reset(*rsp); - /* For memory_global_dirty_log_start below. */ - qemu_mutex_lock_iothread(); + return 0; +} - qemu_mutex_lock_ramlist(); - rcu_read_lock(); - ram_state_reset(*rsp); +static void ram_list_init_bitmaps(void) +{ + RAMBlock *block; + unsigned long pages; /* Skip setting bitmap if there is no RAM */ if (ram_bytes_total()) { - RAMBlock *block; - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - unsigned long pages = block->max_length >> TARGET_PAGE_BITS; - + pages = block->max_length >> TARGET_PAGE_BITS; block->bmap = bitmap_new(pages); bitmap_set(block->bmap, 0, pages); if (migrate_postcopy_ram()) { @@ -2072,18 +2159,36 @@ static int ram_state_init(RAMState **rsp) } } } +} - /* - * Count the total number of pages used by ram blocks not including any - * gaps due to alignment or unplugs. - */ - (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; +static void ram_init_bitmaps(RAMState *rs) +{ + /* For memory_global_dirty_log_start below. */ + qemu_mutex_lock_iothread(); + qemu_mutex_lock_ramlist(); + rcu_read_lock(); + ram_list_init_bitmaps(); memory_global_dirty_log_start(); - migration_bitmap_sync(*rsp); + migration_bitmap_sync(rs); + + rcu_read_unlock(); qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); - rcu_read_unlock(); +} + +static int ram_init_all(RAMState **rsp) +{ + if (ram_state_init(rsp)) { + return -1; + } + + if (xbzrle_init()) { + ram_state_cleanup(rsp); + return -1; + } + + ram_init_bitmaps(*rsp); return 0; } @@ -2110,7 +2215,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) /* migration has already setup the bitmap, reuse it. */ if (!migration_in_colo_state()) { - if (ram_state_init(rsp) != 0) { + if (ram_init_all(rsp) != 0) { return -1; } } @@ -2534,13 +2639,20 @@ static int ram_load_setup(QEMUFile *f, void *opaque) { xbzrle_load_setup(); compress_threads_load_setup(); + ramblock_recv_map_init(); return 0; } static int ram_load_cleanup(void *opaque) { + RAMBlock *rb; xbzrle_load_cleanup(); compress_threads_load_cleanup(); + + RAMBLOCK_FOREACH(rb) { + g_free(rb->receivedmap); + rb->receivedmap = NULL; + } return 0; } @@ -2680,10 +2792,10 @@ static int ram_load_postcopy(QEMUFile *f) if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, - block->page_size); + block); } else { ret = postcopy_place_page(mis, place_dest, - place_source, block->page_size); + place_source, block); } } if (!ret) { @@ -2755,6 +2867,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } + ramblock_recv_bitmap_set(block, host); trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); } diff --git a/migration/ram.h b/migration/ram.h index 4a72d66503..f9f7eef894 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -35,7 +35,7 @@ extern MigrationStats ram_counters; extern XBZRLECacheStats xbzrle_counters; -int64_t xbzrle_cache_resize(int64_t new_size); +int64_t xbzrle_cache_resize(int64_t new_size, Error **errp); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); @@ -57,4 +57,9 @@ int ram_discard_range(const char *block_name, uint64_t start, size_t length); int ram_postcopy_incoming_init(MigrationIncomingState *mis); void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); + +int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); +void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); +void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); + #endif diff --git a/migration/tls.c b/migration/tls.c index 596e8790bd..026a008667 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -119,7 +119,6 @@ static void migration_tls_outgoing_handshake(QIOTask *task, if (qio_task_propagate_error(task, &err)) { trace_migration_tls_outgoing_handshake_error(error_get_pretty(err)); migrate_fd_error(s, err); - error_free(err); } else { trace_migration_tls_outgoing_handshake_complete(); migration_channel_connect(s, ioc, NULL); diff --git a/qapi/migration.json b/qapi/migration.json index f8b365e3f5..6ae866e1aa 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -96,12 +96,18 @@ # @colo: VM is in the process of fault tolerance, VM can not get into this # state unless colo capability is enabled for migration. (since 2.8) # +# @pre-switchover: Paused before device serialisation. (since 2.11) +# +# @device: During device serialisation when pause-before-switchover is enabled +# (since 2.11) +# # Since: 2.3 # ## { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', - 'active', 'postcopy-active', 'completed', 'failed', 'colo' ] } + 'active', 'postcopy-active', 'completed', 'failed', 'colo', + 'pre-switchover', 'device' ] } ## # @MigrationInfo: @@ -341,6 +347,9 @@ # @return-path: If enabled, migration will use the return path even # for precopy. (since 2.10) # +# @pause-before-switchover: Pause outgoing migration before serialising device +# state and before disabling block IO (since 2.11) +# # @x-multifd: Use more than one fd for migration (since 2.11) # # Since: 1.2 @@ -348,7 +357,7 @@ { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path', 'x-multifd' ] } + 'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] } ## # @MigrationCapabilityStatus: @@ -471,7 +480,7 @@ # number of sockets used for migration. The # default value is 2 (since 2.11) # -# @x-multifd-page-count: Number of pages sent together to a thread +# @x-multifd-page-count: Number of pages sent together to a thread. # The default value is 16 (since 2.11) # # Since: 2.4 @@ -542,7 +551,7 @@ # number of sockets used for migration. The # default value is 2 (since 2.11) # -# @x-multifd-page-count: Number of pages sent together to a thread +# @x-multifd-page-count: Number of pages sent together to a thread. # The default value is 16 (since 2.11) # # Since: 2.4 @@ -638,7 +647,7 @@ # number of sockets used for migration. # The default value is 2 (since 2.11) # -# @x-multifd-page-count: Number of pages sent together to a thread +# @x-multifd-page-count: Number of pages sent together to a thread. # The default value is 16 (since 2.11) # # Since: 2.4 @@ -868,6 +877,23 @@ { 'command': 'migrate_cancel' } ## +# @migrate-continue: +# +# Continue migration when it's in a paused state. +# +# @state: The state the migration is currently expected to be in +# +# Returns: nothing on success +# Since: 2.11 +# Example: +# +# -> { "execute": "migrate-continue" , "arguments": +# { "state": "pre-switchover" } } +# <- { "return": {} } +## +{ 'command': 'migrate-continue', 'data': {'state': 'MigrationStatus'} } + +## # @migrate_set_downtime: # # Set maximum tolerated downtime for migration. |