diff options
Diffstat (limited to 'migration')
-rw-r--r-- | migration/migration.c | 38 | ||||
-rw-r--r-- | migration/migration.h | 1 | ||||
-rw-r--r-- | migration/multifd.c | 2 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 15 | ||||
-rw-r--r-- | migration/ram.c | 246 |
5 files changed, 175 insertions, 127 deletions
diff --git a/migration/migration.c b/migration/migration.c index 8ca034136b..4698b47442 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -223,13 +223,18 @@ void migration_object_init(void) dirty_bitmap_mig_init(); } +void migration_cancel(void) +{ + migrate_fd_cancel(current_migration); +} + void migration_shutdown(void) { /* * Cancel the current migration - that will (eventually) * stop the migration using this structure */ - migrate_fd_cancel(current_migration); + migration_cancel(); object_unref(OBJECT(current_migration)); /* @@ -1073,27 +1078,24 @@ static void populate_vfio_info(MigrationInfo *info) static void fill_source_migration_info(MigrationInfo *info) { MigrationState *s = migrate_get_current(); + GSList *cur_blocker = migration_blockers; - info->blocked = migration_is_blocked(NULL); - info->has_blocked_reasons = info->blocked; info->blocked_reasons = NULL; - if (info->blocked) { - GSList *cur_blocker = migration_blockers; - /* - * There are two types of reasons a migration might be blocked; - * a) devices marked in VMState as non-migratable, and - * b) Explicit migration blockers - * We need to add both of them here. - */ - qemu_savevm_non_migratable_list(&info->blocked_reasons); + /* + * There are two types of reasons a migration might be blocked; + * a) devices marked in VMState as non-migratable, and + * b) Explicit migration blockers + * We need to add both of them here. + */ + qemu_savevm_non_migratable_list(&info->blocked_reasons); - while (cur_blocker) { - QAPI_LIST_PREPEND(info->blocked_reasons, - g_strdup(error_get_pretty(cur_blocker->data))); - cur_blocker = g_slist_next(cur_blocker); - } + while (cur_blocker) { + QAPI_LIST_PREPEND(info->blocked_reasons, + g_strdup(error_get_pretty(cur_blocker->data))); + cur_blocker = g_slist_next(cur_blocker); } + info->has_blocked_reasons = info->blocked_reasons != NULL; switch (s->state) { case MIGRATION_STATUS_NONE: @@ -2310,7 +2312,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, void qmp_migrate_cancel(Error **errp) { - migrate_fd_cancel(migrate_get_current()); + migration_cancel(); } void qmp_migrate_continue(MigrationStatus state, Error **errp) diff --git a/migration/migration.h b/migration/migration.h index db6708326b..f7b388d718 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -375,5 +375,6 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); void migration_make_urgent_request(void); void migration_consume_urgent_request(void); bool migration_rate_limit(void); +void migration_cancel(void); #endif diff --git a/migration/multifd.c b/migration/multifd.c index a6677c45c8..0a4803cfcc 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -361,7 +361,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) if (offset > (block->used_length - qemu_target_page_size())) { error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", - offset, block->max_length); + offset, block->used_length); return -1; } p->pages->iov[i].iov_base = block->host + offset; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index ab482adef1..2e9697bdd2 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -17,6 +17,7 @@ */ #include "qemu/osdep.h" +#include "qemu/rcu.h" #include "exec/target_page.h" #include "migration.h" #include "qemu-file.h" @@ -30,6 +31,7 @@ #include "qemu/error-report.h" #include "trace.h" #include "hw/boards.h" +#include "exec/ramblock.h" /* Arbitrary limit on size of each discard command, * keeps them around ~200 bytes @@ -453,6 +455,13 @@ static int init_range(RAMBlock *rb, void *opaque) trace_postcopy_init_range(block_name, host_addr, offset, length); /* + * Save the used_length before running the guest. In case we have to + * resize RAM blocks when syncing RAM block sizes from the source during + * precopy, we'll update it manually via the ram block notifier. + */ + rb->postcopy_length = length; + + /* * We need the whole of RAM to be truly empty for postcopy, so things * like ROMs and any data tables built during init must be zero'd * - we're going to get the copy from the source anyway. @@ -474,7 +483,7 @@ static int cleanup_range(RAMBlock *rb, void *opaque) const char *block_name = qemu_ram_get_idstr(rb); void *host_addr = qemu_ram_get_host_addr(rb); ram_addr_t offset = qemu_ram_get_offset(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); + ram_addr_t length = rb->postcopy_length; MigrationIncomingState *mis = opaque; struct uffdio_range range_struct; trace_postcopy_cleanup_range(block_name, host_addr, offset, length); @@ -580,7 +589,7 @@ static int nhp_range(RAMBlock *rb, void *opaque) const char *block_name = qemu_ram_get_idstr(rb); void *host_addr = qemu_ram_get_host_addr(rb); ram_addr_t offset = qemu_ram_get_offset(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); + ram_addr_t length = rb->postcopy_length; trace_postcopy_nhp_range(block_name, host_addr, offset, length); /* @@ -624,7 +633,7 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque) struct uffdio_register reg_struct; reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb); - reg_struct.range.len = qemu_ram_get_used_length(rb); + reg_struct.range.len = rb->postcopy_length; reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; /* Now tell our userfault_fd that it's responsible for this area */ diff --git a/migration/ram.c b/migration/ram.c index ace8ad431c..60ea913c54 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -240,7 +240,7 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file, return -1; } - nbits = block->used_length >> TARGET_PAGE_BITS; + nbits = block->postcopy_length >> TARGET_PAGE_BITS; /* * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit @@ -311,10 +311,6 @@ struct RAMState { ram_addr_t last_page; /* last ram version we have seen */ uint32_t last_version; - /* We are in the first round */ - bool ram_bulk_stage; - /* The free page optimization is enabled */ - bool fpo_enabled; /* How many times we have dirty too many pages */ int dirty_rate_high_cnt; /* these variables are used for bitmap sync */ @@ -330,6 +326,8 @@ struct RAMState { uint64_t xbzrle_pages_prev; /* Amount of xbzrle encoded bytes since the beginning of the period */ uint64_t xbzrle_bytes_prev; + /* Start using XBZRLE (e.g., after the first round). */ + bool xbzrle_enabled; /* compression statistics since the beginning of the period */ /* amount of count that no free thread to compress data */ @@ -383,15 +381,6 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp) return notifier_with_return_list_notify(&precopy_notifier_list, &pnd); } -void precopy_enable_free_page_optimization(void) -{ - if (!ram_state) { - return; - } - - ram_state->fpo_enabled = true; -} - uint64_t ram_bytes_remaining(void) { return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : @@ -664,7 +653,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, */ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) { - if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { + if (!rs->xbzrle_enabled) { return; } @@ -792,23 +781,12 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, { unsigned long size = rb->used_length >> TARGET_PAGE_BITS; unsigned long *bitmap = rb->bmap; - unsigned long next; if (ramblock_is_ignored(rb)) { return size; } - /* - * When the free page optimization is enabled, we need to check the bitmap - * to send the non-free pages rather than all the pages in the bulk stage. - */ - if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) { - next = start + 1; - } else { - next = find_next_bit(bitmap, size, start); - } - - return next; + return find_next_bit(bitmap, size, start); } static inline bool migration_bitmap_clear_dirty(RAMState *rs, @@ -1185,8 +1163,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) trace_ram_save_page(block->idstr, (uint64_t)offset, p); XBZRLE_cache_lock(); - if (!rs->ram_bulk_stage && !migration_in_postcopy() && - migrate_use_xbzrle()) { + if (rs->xbzrle_enabled && !migration_in_postcopy()) { pages = save_xbzrle_page(rs, &p, current_addr, block, offset, last_stage); if (!last_stage) { @@ -1365,8 +1342,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) *again = false; return false; } - if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS) - >= pss->block->used_length) { + if (!offset_in_ramblock(pss->block, + ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) { /* Didn't find anything in this RAM Block */ pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); @@ -1386,7 +1363,10 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) pss->block = QLIST_FIRST_RCU(&ram_list.blocks); /* Flag that we've looped */ pss->complete_round = true; - rs->ram_bulk_stage = false; + /* After the first round, enable XBZRLE. */ + if (migrate_use_xbzrle()) { + rs->xbzrle_enabled = true; + } } /* Didn't find anything this time, but try again on the new block */ *again = true; @@ -1801,14 +1781,6 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) if (block) { /* - * As soon as we start servicing pages out of order, then we have - * to kill the bulk stage, since the bulk stage assumes - * in (migration_bitmap_find_and_reset_dirty) that every page is - * dirty, that's no longer true. - */ - rs->ram_bulk_stage = false; - - /* * We want the background search to continue from the queued page * since the guest is likely to want other pages near to the page * it just requested. @@ -1891,7 +1863,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) rs->last_req_rb = ramblock; } trace_ram_save_queue_pages(ramblock->idstr, start, len); - if (start + len > ramblock->used_length) { + if (!offset_in_ramblock(ramblock, start + len - 1)) { error_report("%s request overrun start=" RAM_ADDR_FMT " len=" RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, __func__, start, len, ramblock->used_length); @@ -1920,15 +1892,15 @@ static bool save_page_use_compression(RAMState *rs) } /* - * If xbzrle is on, stop using the data compression after first - * round of migration even if compression is enabled. In theory, - * xbzrle can do better than compression. + * If xbzrle is enabled (e.g., after first round of migration), stop + * using the data compression. In theory, xbzrle can do better than + * compression. */ - if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { - return true; + if (rs->xbzrle_enabled) { + return false; } - return false; + return true; } /* @@ -2041,6 +2013,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, int tmppages, pages = 0; size_t pagesize_bits = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; + unsigned long hostpage_boundary = + QEMU_ALIGN_UP(pss->page + 1, pagesize_bits); unsigned long start_page = pss->page; int res; @@ -2051,25 +2025,27 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, do { /* Check the pages is dirty and if it is send it */ - if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { - pss->page++; - continue; - } + if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { + tmppages = ram_save_target_page(rs, pss, last_stage); + if (tmppages < 0) { + return tmppages; + } - tmppages = ram_save_target_page(rs, pss, last_stage); - if (tmppages < 0) { - return tmppages; + pages += tmppages; + /* + * Allow rate limiting to happen in the middle of huge pages if + * something is sent in the current iteration. + */ + if (pagesize_bits > 1 && tmppages > 0) { + migration_rate_limit(); + } } - - pages += tmppages; - pss->page++; - /* Allow rate limiting to happen in the middle of huge pages */ - migration_rate_limit(); - } while ((pss->page & (pagesize_bits - 1)) && + pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); + } while ((pss->page < hostpage_boundary) && offset_in_ramblock(pss->block, ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); - /* The offset we leave with is the last one we looked at */ - pss->page--; + /* The offset we leave with is the min boundary of host page and block */ + pss->page = MIN(pss->page, hostpage_boundary) - 1; res = ram_save_release_protection(rs, pss, start_page); return (res < 0 ? res : pages); @@ -2235,8 +2211,7 @@ static void ram_state_reset(RAMState *rs) rs->last_sent_block = NULL; rs->last_page = 0; rs->last_version = ram_list.version; - rs->ram_bulk_stage = true; - rs->fpo_enabled = false; + rs->xbzrle_enabled = false; } #define MAX_WAIT 50 /* ms, half buffered_file limit */ @@ -2720,15 +2695,7 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out) /* This may not be aligned with current bitmaps. Recalculate. */ rs->migration_dirty_pages = pages; - rs->last_seen_block = NULL; - rs->last_sent_block = NULL; - rs->last_page = 0; - rs->last_version = ram_list.version; - /* - * Disable the bulk stage, otherwise we'll resend the whole RAM no - * matter what we have sent. - */ - rs->ram_bulk_stage = false; + ram_state_reset(rs); /* Update RAMState cache of output QEMUFile */ rs->f = out; @@ -3118,6 +3085,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, return block->host + offset; } +static void *host_page_from_ram_block_offset(RAMBlock *block, + ram_addr_t offset) +{ + /* Note: Explicitly no check against offset_in_ramblock(). */ + return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset), + block->page_size); +} + +static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block, + ram_addr_t offset) +{ + return ((uintptr_t)block->host + offset) & (block->page_size - 1); +} + static inline void *colo_cache_from_block_offset(RAMBlock *block, ram_addr_t offset, bool record_bitmap) { @@ -3345,16 +3326,9 @@ static void decompress_data_with_multi_threads(QEMUFile *f, } } - /* - * we must set ram_bulk_stage to false, otherwise in - * migation_bitmap_find_dirty the bitmap will be unused and - * all the pages in ram cache wil be flushed to the ram of - * secondary VM. - */ static void colo_init_ram_state(void) { ram_state_init(&ram_state); - ram_state->ram_bulk_stage = false; } /* @@ -3521,13 +3495,12 @@ static int ram_load_postcopy(QEMUFile *f) MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ void *postcopy_host_page = mis->postcopy_tmp_page; - void *this_host = NULL; + void *host_page = NULL; bool all_zero = true; int target_pages = 0; while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; - void *host = NULL; void *page_buffer = NULL; void *place_source = NULL; RAMBlock *block = NULL; @@ -3552,9 +3525,18 @@ static int ram_load_postcopy(QEMUFile *f) if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_COMPRESS_PAGE)) { block = ram_block_from_stream(f, flags); + if (!block) { + ret = -EINVAL; + break; + } - host = host_from_ram_block_offset(block, addr); - if (!host) { + /* + * Relying on used_length is racy and can result in false positives. + * We might place pages beyond used_length in case RAM was shrunk + * while in postcopy, which is fine - trying to place via + * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault. + */ + if (!block->host || addr >= block->postcopy_length) { error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); ret = -EINVAL; break; @@ -3572,19 +3554,17 @@ static int ram_load_postcopy(QEMUFile *f) * of a host page in one chunk. */ page_buffer = postcopy_host_page + - ((uintptr_t)host & (block->page_size - 1)); + host_page_offset_from_ram_block_offset(block, addr); + /* If all TP are zero then we can optimise the place */ if (target_pages == 1) { - this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, - block->page_size); - } else { + host_page = host_page_from_ram_block_offset(block, addr); + } else if (host_page != host_page_from_ram_block_offset(block, + addr)) { /* not the 1st TP within the HP */ - if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) != - (uintptr_t)this_host) { - error_report("Non-same host page %p/%p", - host, this_host); - ret = -EINVAL; - break; - } + error_report("Non-same host page %p/%p", host_page, + host_page_from_ram_block_offset(block, addr)); + ret = -EINVAL; + break; } /* @@ -3663,16 +3643,11 @@ static int ram_load_postcopy(QEMUFile *f) } if (!ret && place_needed) { - /* This gets called at the last target page in the host page */ - void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, - block->page_size); - if (all_zero) { - ret = postcopy_place_page_zero(mis, place_dest, - block); + ret = postcopy_place_page_zero(mis, host_page, block); } else { - ret = postcopy_place_page(mis, place_dest, - place_source, block); + ret = postcopy_place_page(mis, host_page, place_source, + block); } place_needed = false; target_pages = 0; @@ -3721,8 +3696,8 @@ void colo_flush_ram_cache(void) while (block) { offset = migration_bitmap_find_dirty(ram_state, block, offset); - if (((ram_addr_t)offset) << TARGET_PAGE_BITS - >= block->used_length) { + if (!offset_in_ramblock(block, + ((ram_addr_t)offset) << TARGET_PAGE_BITS)) { offset = 0; block = QLIST_NEXT_RCU(block, next); } else { @@ -4136,8 +4111,69 @@ static SaveVMHandlers savevm_ram_handlers = { .resume_prepare = ram_resume_prepare, }; +static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, + size_t old_size, size_t new_size) +{ + PostcopyState ps = postcopy_state_get(); + ram_addr_t offset; + RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset); + Error *err = NULL; + + if (ramblock_is_ignored(rb)) { + return; + } + + if (!migration_is_idle()) { + /* + * Precopy code on the source cannot deal with the size of RAM blocks + * changing at random points in time - especially after sending the + * RAM block sizes in the migration stream, they must no longer change. + * Abort and indicate a proper reason. + */ + error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr); + migrate_set_error(migrate_get_current(), err); + error_free(err); + migration_cancel(); + } + + switch (ps) { + case POSTCOPY_INCOMING_ADVISE: + /* + * Update what ram_postcopy_incoming_init()->init_range() does at the + * time postcopy was advised. Syncing RAM blocks with the source will + * result in RAM resizes. + */ + if (old_size < new_size) { + if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) { + error_report("RAM block '%s' discard of resized RAM failed", + rb->idstr); + } + } + rb->postcopy_length = new_size; + break; + case POSTCOPY_INCOMING_NONE: + case POSTCOPY_INCOMING_RUNNING: + case POSTCOPY_INCOMING_END: + /* + * Once our guest is running, postcopy does no longer care about + * resizes. When growing, the new memory was not available on the + * source, no handler needed. + */ + break; + default: + error_report("RAM block '%s' resized during postcopy state: %d", + rb->idstr, ps); + exit(-1); + } +} + +static RAMBlockNotifier ram_mig_ram_notifier = { + .ram_block_resized = ram_mig_ram_block_resized, +}; + void ram_mig_init(void) { qemu_mutex_init(&XBZRLE.lock); register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state); + ram_block_notifier_add(&ram_mig_ram_notifier); } |