aboutsummaryrefslogtreecommitdiff
path: root/migration
diff options
context:
space:
mode:
Diffstat (limited to 'migration')
-rw-r--r--migration/migration.c38
-rw-r--r--migration/migration.h1
-rw-r--r--migration/multifd.c2
-rw-r--r--migration/postcopy-ram.c15
-rw-r--r--migration/ram.c246
5 files changed, 175 insertions, 127 deletions
diff --git a/migration/migration.c b/migration/migration.c
index 8ca034136b..4698b47442 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -223,13 +223,18 @@ void migration_object_init(void)
dirty_bitmap_mig_init();
}
+void migration_cancel(void)
+{
+ migrate_fd_cancel(current_migration);
+}
+
void migration_shutdown(void)
{
/*
* Cancel the current migration - that will (eventually)
* stop the migration using this structure
*/
- migrate_fd_cancel(current_migration);
+ migration_cancel();
object_unref(OBJECT(current_migration));
/*
@@ -1073,27 +1078,24 @@ static void populate_vfio_info(MigrationInfo *info)
static void fill_source_migration_info(MigrationInfo *info)
{
MigrationState *s = migrate_get_current();
+ GSList *cur_blocker = migration_blockers;
- info->blocked = migration_is_blocked(NULL);
- info->has_blocked_reasons = info->blocked;
info->blocked_reasons = NULL;
- if (info->blocked) {
- GSList *cur_blocker = migration_blockers;
- /*
- * There are two types of reasons a migration might be blocked;
- * a) devices marked in VMState as non-migratable, and
- * b) Explicit migration blockers
- * We need to add both of them here.
- */
- qemu_savevm_non_migratable_list(&info->blocked_reasons);
+ /*
+ * There are two types of reasons a migration might be blocked;
+ * a) devices marked in VMState as non-migratable, and
+ * b) Explicit migration blockers
+ * We need to add both of them here.
+ */
+ qemu_savevm_non_migratable_list(&info->blocked_reasons);
- while (cur_blocker) {
- QAPI_LIST_PREPEND(info->blocked_reasons,
- g_strdup(error_get_pretty(cur_blocker->data)));
- cur_blocker = g_slist_next(cur_blocker);
- }
+ while (cur_blocker) {
+ QAPI_LIST_PREPEND(info->blocked_reasons,
+ g_strdup(error_get_pretty(cur_blocker->data)));
+ cur_blocker = g_slist_next(cur_blocker);
}
+ info->has_blocked_reasons = info->blocked_reasons != NULL;
switch (s->state) {
case MIGRATION_STATUS_NONE:
@@ -2310,7 +2312,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
void qmp_migrate_cancel(Error **errp)
{
- migrate_fd_cancel(migrate_get_current());
+ migration_cancel();
}
void qmp_migrate_continue(MigrationStatus state, Error **errp)
diff --git a/migration/migration.h b/migration/migration.h
index db6708326b..f7b388d718 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -375,5 +375,6 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
void migration_make_urgent_request(void);
void migration_consume_urgent_request(void);
bool migration_rate_limit(void);
+void migration_cancel(void);
#endif
diff --git a/migration/multifd.c b/migration/multifd.c
index a6677c45c8..0a4803cfcc 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -361,7 +361,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
if (offset > (block->used_length - qemu_target_page_size())) {
error_setg(errp, "multifd: offset too long %" PRIu64
" (max " RAM_ADDR_FMT ")",
- offset, block->max_length);
+ offset, block->used_length);
return -1;
}
p->pages->iov[i].iov_base = block->host + offset;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index ab482adef1..2e9697bdd2 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -17,6 +17,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/rcu.h"
#include "exec/target_page.h"
#include "migration.h"
#include "qemu-file.h"
@@ -30,6 +31,7 @@
#include "qemu/error-report.h"
#include "trace.h"
#include "hw/boards.h"
+#include "exec/ramblock.h"
/* Arbitrary limit on size of each discard command,
* keeps them around ~200 bytes
@@ -453,6 +455,13 @@ static int init_range(RAMBlock *rb, void *opaque)
trace_postcopy_init_range(block_name, host_addr, offset, length);
/*
+ * Save the used_length before running the guest. In case we have to
+ * resize RAM blocks when syncing RAM block sizes from the source during
+ * precopy, we'll update it manually via the ram block notifier.
+ */
+ rb->postcopy_length = length;
+
+ /*
* We need the whole of RAM to be truly empty for postcopy, so things
* like ROMs and any data tables built during init must be zero'd
* - we're going to get the copy from the source anyway.
@@ -474,7 +483,7 @@ static int cleanup_range(RAMBlock *rb, void *opaque)
const char *block_name = qemu_ram_get_idstr(rb);
void *host_addr = qemu_ram_get_host_addr(rb);
ram_addr_t offset = qemu_ram_get_offset(rb);
- ram_addr_t length = qemu_ram_get_used_length(rb);
+ ram_addr_t length = rb->postcopy_length;
MigrationIncomingState *mis = opaque;
struct uffdio_range range_struct;
trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
@@ -580,7 +589,7 @@ static int nhp_range(RAMBlock *rb, void *opaque)
const char *block_name = qemu_ram_get_idstr(rb);
void *host_addr = qemu_ram_get_host_addr(rb);
ram_addr_t offset = qemu_ram_get_offset(rb);
- ram_addr_t length = qemu_ram_get_used_length(rb);
+ ram_addr_t length = rb->postcopy_length;
trace_postcopy_nhp_range(block_name, host_addr, offset, length);
/*
@@ -624,7 +633,7 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
struct uffdio_register reg_struct;
reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb);
- reg_struct.range.len = qemu_ram_get_used_length(rb);
+ reg_struct.range.len = rb->postcopy_length;
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
/* Now tell our userfault_fd that it's responsible for this area */
diff --git a/migration/ram.c b/migration/ram.c
index ace8ad431c..60ea913c54 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -240,7 +240,7 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
return -1;
}
- nbits = block->used_length >> TARGET_PAGE_BITS;
+ nbits = block->postcopy_length >> TARGET_PAGE_BITS;
/*
* Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
@@ -311,10 +311,6 @@ struct RAMState {
ram_addr_t last_page;
/* last ram version we have seen */
uint32_t last_version;
- /* We are in the first round */
- bool ram_bulk_stage;
- /* The free page optimization is enabled */
- bool fpo_enabled;
/* How many times we have dirty too many pages */
int dirty_rate_high_cnt;
/* these variables are used for bitmap sync */
@@ -330,6 +326,8 @@ struct RAMState {
uint64_t xbzrle_pages_prev;
/* Amount of xbzrle encoded bytes since the beginning of the period */
uint64_t xbzrle_bytes_prev;
+ /* Start using XBZRLE (e.g., after the first round). */
+ bool xbzrle_enabled;
/* compression statistics since the beginning of the period */
/* amount of count that no free thread to compress data */
@@ -383,15 +381,6 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp)
return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
}
-void precopy_enable_free_page_optimization(void)
-{
- if (!ram_state) {
- return;
- }
-
- ram_state->fpo_enabled = true;
-}
-
uint64_t ram_bytes_remaining(void)
{
return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
@@ -664,7 +653,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
*/
static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
{
- if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
+ if (!rs->xbzrle_enabled) {
return;
}
@@ -792,23 +781,12 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
{
unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
unsigned long *bitmap = rb->bmap;
- unsigned long next;
if (ramblock_is_ignored(rb)) {
return size;
}
- /*
- * When the free page optimization is enabled, we need to check the bitmap
- * to send the non-free pages rather than all the pages in the bulk stage.
- */
- if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
- next = start + 1;
- } else {
- next = find_next_bit(bitmap, size, start);
- }
-
- return next;
+ return find_next_bit(bitmap, size, start);
}
static inline bool migration_bitmap_clear_dirty(RAMState *rs,
@@ -1185,8 +1163,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
trace_ram_save_page(block->idstr, (uint64_t)offset, p);
XBZRLE_cache_lock();
- if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
- migrate_use_xbzrle()) {
+ if (rs->xbzrle_enabled && !migration_in_postcopy()) {
pages = save_xbzrle_page(rs, &p, current_addr, block,
offset, last_stage);
if (!last_stage) {
@@ -1365,8 +1342,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
*again = false;
return false;
}
- if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
- >= pss->block->used_length) {
+ if (!offset_in_ramblock(pss->block,
+ ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
/* Didn't find anything in this RAM Block */
pss->page = 0;
pss->block = QLIST_NEXT_RCU(pss->block, next);
@@ -1386,7 +1363,10 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
/* Flag that we've looped */
pss->complete_round = true;
- rs->ram_bulk_stage = false;
+ /* After the first round, enable XBZRLE. */
+ if (migrate_use_xbzrle()) {
+ rs->xbzrle_enabled = true;
+ }
}
/* Didn't find anything this time, but try again on the new block */
*again = true;
@@ -1801,14 +1781,6 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
if (block) {
/*
- * As soon as we start servicing pages out of order, then we have
- * to kill the bulk stage, since the bulk stage assumes
- * in (migration_bitmap_find_and_reset_dirty) that every page is
- * dirty, that's no longer true.
- */
- rs->ram_bulk_stage = false;
-
- /*
* We want the background search to continue from the queued page
* since the guest is likely to want other pages near to the page
* it just requested.
@@ -1891,7 +1863,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
rs->last_req_rb = ramblock;
}
trace_ram_save_queue_pages(ramblock->idstr, start, len);
- if (start + len > ramblock->used_length) {
+ if (!offset_in_ramblock(ramblock, start + len - 1)) {
error_report("%s request overrun start=" RAM_ADDR_FMT " len="
RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
__func__, start, len, ramblock->used_length);
@@ -1920,15 +1892,15 @@ static bool save_page_use_compression(RAMState *rs)
}
/*
- * If xbzrle is on, stop using the data compression after first
- * round of migration even if compression is enabled. In theory,
- * xbzrle can do better than compression.
+ * If xbzrle is enabled (e.g., after first round of migration), stop
+ * using the data compression. In theory, xbzrle can do better than
+ * compression.
*/
- if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
- return true;
+ if (rs->xbzrle_enabled) {
+ return false;
}
- return false;
+ return true;
}
/*
@@ -2041,6 +2013,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
int tmppages, pages = 0;
size_t pagesize_bits =
qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
+ unsigned long hostpage_boundary =
+ QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
unsigned long start_page = pss->page;
int res;
@@ -2051,25 +2025,27 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
do {
/* Check the pages is dirty and if it is send it */
- if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
- pss->page++;
- continue;
- }
+ if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
+ tmppages = ram_save_target_page(rs, pss, last_stage);
+ if (tmppages < 0) {
+ return tmppages;
+ }
- tmppages = ram_save_target_page(rs, pss, last_stage);
- if (tmppages < 0) {
- return tmppages;
+ pages += tmppages;
+ /*
+ * Allow rate limiting to happen in the middle of huge pages if
+ * something is sent in the current iteration.
+ */
+ if (pagesize_bits > 1 && tmppages > 0) {
+ migration_rate_limit();
+ }
}
-
- pages += tmppages;
- pss->page++;
- /* Allow rate limiting to happen in the middle of huge pages */
- migration_rate_limit();
- } while ((pss->page & (pagesize_bits - 1)) &&
+ pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
+ } while ((pss->page < hostpage_boundary) &&
offset_in_ramblock(pss->block,
((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
- /* The offset we leave with is the last one we looked at */
- pss->page--;
+ /* The offset we leave with is the min boundary of host page and block */
+ pss->page = MIN(pss->page, hostpage_boundary) - 1;
res = ram_save_release_protection(rs, pss, start_page);
return (res < 0 ? res : pages);
@@ -2235,8 +2211,7 @@ static void ram_state_reset(RAMState *rs)
rs->last_sent_block = NULL;
rs->last_page = 0;
rs->last_version = ram_list.version;
- rs->ram_bulk_stage = true;
- rs->fpo_enabled = false;
+ rs->xbzrle_enabled = false;
}
#define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -2720,15 +2695,7 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
/* This may not be aligned with current bitmaps. Recalculate. */
rs->migration_dirty_pages = pages;
- rs->last_seen_block = NULL;
- rs->last_sent_block = NULL;
- rs->last_page = 0;
- rs->last_version = ram_list.version;
- /*
- * Disable the bulk stage, otherwise we'll resend the whole RAM no
- * matter what we have sent.
- */
- rs->ram_bulk_stage = false;
+ ram_state_reset(rs);
/* Update RAMState cache of output QEMUFile */
rs->f = out;
@@ -3118,6 +3085,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
return block->host + offset;
}
+static void *host_page_from_ram_block_offset(RAMBlock *block,
+ ram_addr_t offset)
+{
+ /* Note: Explicitly no check against offset_in_ramblock(). */
+ return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
+ block->page_size);
+}
+
+static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
+ ram_addr_t offset)
+{
+ return ((uintptr_t)block->host + offset) & (block->page_size - 1);
+}
+
static inline void *colo_cache_from_block_offset(RAMBlock *block,
ram_addr_t offset, bool record_bitmap)
{
@@ -3345,16 +3326,9 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
}
}
- /*
- * we must set ram_bulk_stage to false, otherwise in
- * migation_bitmap_find_dirty the bitmap will be unused and
- * all the pages in ram cache wil be flushed to the ram of
- * secondary VM.
- */
static void colo_init_ram_state(void)
{
ram_state_init(&ram_state);
- ram_state->ram_bulk_stage = false;
}
/*
@@ -3521,13 +3495,12 @@ static int ram_load_postcopy(QEMUFile *f)
MigrationIncomingState *mis = migration_incoming_get_current();
/* Temporary page that is later 'placed' */
void *postcopy_host_page = mis->postcopy_tmp_page;
- void *this_host = NULL;
+ void *host_page = NULL;
bool all_zero = true;
int target_pages = 0;
while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
ram_addr_t addr;
- void *host = NULL;
void *page_buffer = NULL;
void *place_source = NULL;
RAMBlock *block = NULL;
@@ -3552,9 +3525,18 @@ static int ram_load_postcopy(QEMUFile *f)
if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
RAM_SAVE_FLAG_COMPRESS_PAGE)) {
block = ram_block_from_stream(f, flags);
+ if (!block) {
+ ret = -EINVAL;
+ break;
+ }
- host = host_from_ram_block_offset(block, addr);
- if (!host) {
+ /*
+ * Relying on used_length is racy and can result in false positives.
+ * We might place pages beyond used_length in case RAM was shrunk
+ * while in postcopy, which is fine - trying to place via
+ * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault.
+ */
+ if (!block->host || addr >= block->postcopy_length) {
error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
ret = -EINVAL;
break;
@@ -3572,19 +3554,17 @@ static int ram_load_postcopy(QEMUFile *f)
* of a host page in one chunk.
*/
page_buffer = postcopy_host_page +
- ((uintptr_t)host & (block->page_size - 1));
+ host_page_offset_from_ram_block_offset(block, addr);
+ /* If all TP are zero then we can optimise the place */
if (target_pages == 1) {
- this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
- block->page_size);
- } else {
+ host_page = host_page_from_ram_block_offset(block, addr);
+ } else if (host_page != host_page_from_ram_block_offset(block,
+ addr)) {
/* not the 1st TP within the HP */
- if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
- (uintptr_t)this_host) {
- error_report("Non-same host page %p/%p",
- host, this_host);
- ret = -EINVAL;
- break;
- }
+ error_report("Non-same host page %p/%p", host_page,
+ host_page_from_ram_block_offset(block, addr));
+ ret = -EINVAL;
+ break;
}
/*
@@ -3663,16 +3643,11 @@ static int ram_load_postcopy(QEMUFile *f)
}
if (!ret && place_needed) {
- /* This gets called at the last target page in the host page */
- void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
- block->page_size);
-
if (all_zero) {
- ret = postcopy_place_page_zero(mis, place_dest,
- block);
+ ret = postcopy_place_page_zero(mis, host_page, block);
} else {
- ret = postcopy_place_page(mis, place_dest,
- place_source, block);
+ ret = postcopy_place_page(mis, host_page, place_source,
+ block);
}
place_needed = false;
target_pages = 0;
@@ -3721,8 +3696,8 @@ void colo_flush_ram_cache(void)
while (block) {
offset = migration_bitmap_find_dirty(ram_state, block, offset);
- if (((ram_addr_t)offset) << TARGET_PAGE_BITS
- >= block->used_length) {
+ if (!offset_in_ramblock(block,
+ ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
offset = 0;
block = QLIST_NEXT_RCU(block, next);
} else {
@@ -4136,8 +4111,69 @@ static SaveVMHandlers savevm_ram_handlers = {
.resume_prepare = ram_resume_prepare,
};
+static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
+ size_t old_size, size_t new_size)
+{
+ PostcopyState ps = postcopy_state_get();
+ ram_addr_t offset;
+ RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
+ Error *err = NULL;
+
+ if (ramblock_is_ignored(rb)) {
+ return;
+ }
+
+ if (!migration_is_idle()) {
+ /*
+ * Precopy code on the source cannot deal with the size of RAM blocks
+ * changing at random points in time - especially after sending the
+ * RAM block sizes in the migration stream, they must no longer change.
+ * Abort and indicate a proper reason.
+ */
+ error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
+ migrate_set_error(migrate_get_current(), err);
+ error_free(err);
+ migration_cancel();
+ }
+
+ switch (ps) {
+ case POSTCOPY_INCOMING_ADVISE:
+ /*
+ * Update what ram_postcopy_incoming_init()->init_range() does at the
+ * time postcopy was advised. Syncing RAM blocks with the source will
+ * result in RAM resizes.
+ */
+ if (old_size < new_size) {
+ if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
+ error_report("RAM block '%s' discard of resized RAM failed",
+ rb->idstr);
+ }
+ }
+ rb->postcopy_length = new_size;
+ break;
+ case POSTCOPY_INCOMING_NONE:
+ case POSTCOPY_INCOMING_RUNNING:
+ case POSTCOPY_INCOMING_END:
+ /*
+ * Once our guest is running, postcopy does no longer care about
+ * resizes. When growing, the new memory was not available on the
+ * source, no handler needed.
+ */
+ break;
+ default:
+ error_report("RAM block '%s' resized during postcopy state: %d",
+ rb->idstr, ps);
+ exit(-1);
+ }
+}
+
+static RAMBlockNotifier ram_mig_ram_notifier = {
+ .ram_block_resized = ram_mig_ram_block_resized,
+};
+
void ram_mig_init(void)
{
qemu_mutex_init(&XBZRLE.lock);
register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
+ ram_block_notifier_add(&ram_mig_ram_notifier);
}