aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hmp-commands.hx12
-rw-r--r--hmp.c13
-rw-r--r--hmp.h1
-rw-r--r--include/exec/ram_addr.h10
-rw-r--r--migration/migration.c129
-rw-r--r--migration/migration.h11
-rw-r--r--migration/page_cache.c25
-rw-r--r--migration/page_cache.h7
-rw-r--r--migration/postcopy-ram.c54
-rw-r--r--migration/postcopy-ram.h4
-rw-r--r--migration/ram.c259
-rw-r--r--migration/ram.h7
-rw-r--r--migration/tls.c1
-rw-r--r--qapi/migration.json36
14 files changed, 431 insertions, 138 deletions
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 1941e19932..4afd57cf5f 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -959,7 +959,19 @@ STEXI
@item migrate_cancel
@findex migrate_cancel
Cancel the current VM migration.
+ETEXI
+ {
+ .name = "migrate_continue",
+ .args_type = "state:s",
+ .params = "state",
+ .help = "Continue migration from the given paused state",
+ .cmd = hmp_migrate_continue,
+ },
+STEXI
+@item migrate_continue @var{state}
+@findex migrate_continue
+Continue migration from the paused state @var{state}
ETEXI
{
diff --git a/hmp.c b/hmp.c
index ec61329ebb..41fcce6f5a 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1495,6 +1495,19 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
qmp_migrate_cancel(NULL);
}
+void hmp_migrate_continue(Monitor *mon, const QDict *qdict)
+{
+ Error *err = NULL;
+ const char *state = qdict_get_str(qdict, "state");
+ int val = qapi_enum_parse(&MigrationStatus_lookup, state, -1, &err);
+
+ if (val >= 0) {
+ qmp_migrate_continue(val, &err);
+ }
+
+ hmp_handle_error(mon, &err);
+}
+
void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
diff --git a/hmp.h b/hmp.h
index 3605003e4c..a6f56b1f29 100644
--- a/hmp.h
+++ b/hmp.h
@@ -68,6 +68,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict);
void hmp_delvm(Monitor *mon, const QDict *qdict);
void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
+void hmp_migrate_continue(Monitor *mon, const QDict *qdict);
void hmp_migrate_incoming(Monitor *mon, const QDict *qdict);
void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict);
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index d017639f7e..6cbc02aa0f 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -47,6 +47,8 @@ struct RAMBlock {
* of the postcopy phase
*/
unsigned long *unsentmap;
+ /* bitmap of already received pages in postcopy */
+ unsigned long *receivedmap;
};
static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
return (char *)block->host + offset;
}
+static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
+ RAMBlock *rb)
+{
+ uint64_t host_addr_offset =
+ (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
+ return host_addr_offset >> TARGET_PAGE_BITS;
+}
+
long qemu_getrampagesize(void);
unsigned long last_ram_page(void);
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
diff --git a/migration/migration.c b/migration/migration.c
index 98429dc843..62761d5705 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -104,6 +104,9 @@ enum mig_rp_message_type {
static MigrationState *current_migration;
static bool migration_object_check(MigrationState *ms, Error **errp);
+static int migration_maybe_pause(MigrationState *s,
+ int *current_active_state,
+ int new_state);
void migration_object_init(void)
{
@@ -526,6 +529,8 @@ static bool migration_is_setup_or_active(int state)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_SETUP:
+ case MIGRATION_STATUS_PRE_SWITCHOVER:
+ case MIGRATION_STATUS_DEVICE:
return true;
default:
@@ -600,6 +605,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_CANCELLING:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+ case MIGRATION_STATUS_PRE_SWITCHOVER:
+ case MIGRATION_STATUS_DEVICE:
/* TODO add some postcopy stats */
info->has_status = true;
info->has_total_time = true;
@@ -865,6 +872,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_block_incremental) {
dest->block_incremental = params->block_incremental;
}
+ if (params->has_x_multifd_channels) {
+ dest->x_multifd_channels = params->x_multifd_channels;
+ }
+ if (params->has_x_multifd_page_count) {
+ dest->x_multifd_page_count = params->x_multifd_page_count;
+ }
}
static void migrate_params_apply(MigrateSetParameters *params)
@@ -1071,19 +1084,30 @@ static void migrate_fd_cleanup(void *opaque)
MIGRATION_STATUS_CANCELLED);
}
+ if (s->error) {
+ /* It is used on info migrate. We can't free it */
+ error_report_err(error_copy(s->error));
+ }
notifier_list_notify(&migration_state_notifiers, s);
block_cleanup_parameters(s);
}
+void migrate_set_error(MigrationState *s, const Error *error)
+{
+ qemu_mutex_lock(&s->error_mutex);
+ if (!s->error) {
+ s->error = error_copy(error);
+ }
+ qemu_mutex_unlock(&s->error_mutex);
+}
+
void migrate_fd_error(MigrationState *s, const Error *error)
{
trace_migrate_fd_error(error_get_pretty(error));
assert(s->to_dst_file == NULL);
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_FAILED);
- if (!s->error) {
- s->error = error_copy(error);
- }
+ migrate_set_error(s, error);
notifier_list_notify(&migration_state_notifiers, s);
block_cleanup_parameters(s);
}
@@ -1104,6 +1128,10 @@ static void migrate_fd_cancel(MigrationState *s)
if (!migration_is_setup_or_active(old_state)) {
break;
}
+ /* If the migration is paused, kick it out of the pause */
+ if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
+ qemu_sem_post(&s->pause_sem);
+ }
migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
} while (s->state != MIGRATION_STATUS_CANCELLING);
@@ -1183,6 +1211,8 @@ bool migration_is_idle(void)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_COLO:
+ case MIGRATION_STATUS_PRE_SWITCHOVER:
+ case MIGRATION_STATUS_DEVICE:
return false;
case MIGRATION_STATUS__MAX:
g_assert_not_reached();
@@ -1362,29 +1392,24 @@ void qmp_migrate_cancel(Error **errp)
migrate_fd_cancel(migrate_get_current());
}
-void qmp_migrate_set_cache_size(int64_t value, Error **errp)
+void qmp_migrate_continue(MigrationStatus state, Error **errp)
{
MigrationState *s = migrate_get_current();
- int64_t new_size;
-
- /* Check for truncation */
- if (value != (size_t)value) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
- "exceeding address space");
+ if (s->state != state) {
+ error_setg(errp, "Migration not in expected state: %s",
+ MigrationStatus_str(s->state));
return;
}
+ qemu_sem_post(&s->pause_sem);
+}
- /* Cache should not be larger than guest ram size */
- if (value > ram_bytes_total()) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
- "exceeds guest ram size ");
- return;
- }
+void qmp_migrate_set_cache_size(int64_t value, Error **errp)
+{
+ MigrationState *s = migrate_get_current();
+ int64_t new_size;
- new_size = xbzrle_cache_resize(value);
+ new_size = xbzrle_cache_resize(value, errp);
if (new_size < 0) {
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
- "is smaller than page size");
return;
}
@@ -1521,6 +1546,16 @@ bool migrate_use_multifd(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD];
}
+bool migrate_pause_before_switchover(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[
+ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
+}
+
int migrate_multifd_channels(void)
{
MigrationState *s;
@@ -1799,8 +1834,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
QEMUFile *fb;
int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
bool restart_block = false;
- migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
- MIGRATION_STATUS_POSTCOPY_ACTIVE);
+ int cur_state = MIGRATION_STATUS_ACTIVE;
+ if (!migrate_pause_before_switchover()) {
+ migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_POSTCOPY_ACTIVE);
+ }
trace_postcopy_start();
qemu_mutex_lock_iothread();
@@ -1814,6 +1852,12 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
goto fail;
}
+ ret = migration_maybe_pause(ms, &cur_state,
+ MIGRATION_STATUS_POSTCOPY_ACTIVE);
+ if (ret < 0) {
+ goto fail;
+ }
+
ret = bdrv_inactivate_all();
if (ret < 0) {
goto fail;
@@ -1952,6 +1996,41 @@ fail:
}
/**
+ * migration_maybe_pause: Pause if required to by
+ * migrate_pause_before_switchover called with the iothread locked
+ * Returns: 0 on success
+ */
+static int migration_maybe_pause(MigrationState *s,
+ int *current_active_state,
+ int new_state)
+{
+ if (!migrate_pause_before_switchover()) {
+ return 0;
+ }
+
+ /* Since leaving this state is not atomic with posting the semaphore
+ * it's possible that someone could have issued multiple migrate_continue
+ * and the semaphore is incorrectly positive at this point;
+ * the docs say it's undefined to reinit a semaphore that's already
+ * init'd, so use timedwait to eat up any existing posts.
+ */
+ while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
+ /* This block intentionally left blank */
+ }
+
+ qemu_mutex_unlock_iothread();
+ migrate_set_state(&s->state, *current_active_state,
+ MIGRATION_STATUS_PRE_SWITCHOVER);
+ qemu_sem_wait(&s->pause_sem);
+ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
+ new_state);
+ *current_active_state = new_state;
+ qemu_mutex_lock_iothread();
+
+ return s->state == new_state ? 0 : -EINVAL;
+}
+
+/**
* migration_completion: Used by migration_thread when there's not much left.
* The caller 'breaks' the loop when this returns.
*
@@ -1977,6 +2056,10 @@ static void migration_completion(MigrationState *s, int current_active_state,
bool inactivate = !migrate_colo_enabled();
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
if (ret >= 0) {
+ ret = migration_maybe_pause(s, &current_active_state,
+ MIGRATION_STATUS_DEVICE);
+ }
+ if (ret >= 0) {
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
inactivate);
@@ -2355,8 +2438,10 @@ static void migration_instance_finalize(Object *obj)
MigrationState *ms = MIGRATION_OBJ(obj);
MigrationParameters *params = &ms->parameters;
+ qemu_mutex_destroy(&ms->error_mutex);
g_free(params->tls_hostname);
g_free(params->tls_creds);
+ qemu_sem_destroy(&ms->pause_sem);
}
static void migration_instance_init(Object *obj)
@@ -2367,6 +2452,8 @@ static void migration_instance_init(Object *obj)
ms->state = MIGRATION_STATUS_NONE;
ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE;
ms->mbps = -1;
+ qemu_sem_init(&ms->pause_sem, 0);
+ qemu_mutex_init(&ms->error_mutex);
params->tls_hostname = g_strdup("");
params->tls_creds = g_strdup("");
diff --git a/migration/migration.h b/migration/migration.h
index b83cceadc4..8ccdd7a577 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -121,6 +121,9 @@ struct MigrationState
/* Flag set once the migration thread called bdrv_inactivate_all */
bool block_inactive;
+ /* Migration is paused due to pause-before-switchover */
+ QemuSemaphore pause_sem;
+
/* The semaphore is used to notify COLO thread that failover is finished */
QemuSemaphore colo_exit_sem;
@@ -129,8 +132,12 @@ struct MigrationState
int64_t colo_checkpoint_time;
QEMUTimer *colo_delay_timer;
- /* The last error that occurred */
+ /* The first error that has occurred.
+ We used the mutex to be able to return the 1st error message */
Error *error;
+ /* mutex to protect errp */
+ QemuMutex error_mutex;
+
/* Do we have to clean up -b/-i from old migrate parameters */
/* This feature is deprecated and will be removed */
bool must_remove_block_options;
@@ -159,6 +166,7 @@ bool migration_has_all_channels(void);
uint64_t migrate_max_downtime(void);
+void migrate_set_error(MigrationState *s, const Error *error);
void migrate_fd_error(MigrationState *s, const Error *error);
void migrate_fd_connect(MigrationState *s);
@@ -177,6 +185,7 @@ bool migrate_zero_blocks(void);
bool migrate_auto_converge(void);
bool migrate_use_multifd(void);
+bool migrate_pause_before_switchover(void);
int migrate_multifd_channels(void);
int migrate_multifd_page_count(void);
diff --git a/migration/page_cache.c b/migration/page_cache.c
index ba984c4858..9a9d13d6a2 100644
--- a/migration/page_cache.c
+++ b/migration/page_cache.c
@@ -14,6 +14,8 @@
#include "qemu/osdep.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "qemu/host-utils.h"
#include "migration/page_cache.h"
@@ -39,27 +41,28 @@ struct CacheItem {
struct PageCache {
CacheItem *page_cache;
- unsigned int page_size;
- int64_t max_num_items;
- uint64_t max_item_age;
- int64_t num_items;
+ size_t page_size;
+ size_t max_num_items;
+ size_t num_items;
};
-PageCache *cache_init(int64_t num_pages, unsigned int page_size)
+PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp)
{
int64_t i;
-
+ size_t num_pages = new_size / page_size;
PageCache *cache;
- if (num_pages <= 0) {
- DPRINTF("invalid number of pages\n");
+ if (new_size < page_size) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
+ "is smaller than one target page size");
return NULL;
}
/* We prefer not to abort if there is no memory */
cache = g_try_malloc(sizeof(*cache));
if (!cache) {
- DPRINTF("Failed to allocate cache\n");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
+ "Failed to allocate cache");
return NULL;
}
/* round down to the nearest power of 2 */
@@ -69,7 +72,6 @@ PageCache *cache_init(int64_t num_pages, unsigned int page_size)
}
cache->page_size = page_size;
cache->num_items = 0;
- cache->max_item_age = 0;
cache->max_num_items = num_pages;
DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items);
@@ -78,7 +80,8 @@ PageCache *cache_init(int64_t num_pages, unsigned int page_size)
cache->page_cache = g_try_malloc((cache->max_num_items) *
sizeof(*cache->page_cache));
if (!cache->page_cache) {
- DPRINTF("Failed to allocate cache->page_cache\n");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
+ "Failed to allocate page cache");
g_free(cache);
return NULL;
}
diff --git a/migration/page_cache.h b/migration/page_cache.h
index 4fadd0c501..0cb94498a0 100644
--- a/migration/page_cache.h
+++ b/migration/page_cache.h
@@ -24,12 +24,11 @@ typedef struct PageCache PageCache;
*
* Returns new allocated cache or NULL on error
*
- * @cache pointer to the PageCache struct
- * @num_pages: cache maximal number of cached pages
+ * @cache_size: cache size in bytes
* @page_size: cache page size
+ * @errp: set *errp if the check failed, with reason
*/
-PageCache *cache_init(int64_t num_pages, unsigned int page_size);
-
+PageCache *cache_init(int64_t cache_size, size_t page_size, Error **errp);
/**
* cache_fini: free all cache resources
* @cache pointer to the PageCache struct
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 0de68e8b25..bec6c2c66b 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -641,26 +641,46 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
return 0;
}
+static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
+ void *from_addr, uint64_t pagesize, RAMBlock *rb)
+{
+ int ret;
+ if (from_addr) {
+ struct uffdio_copy copy_struct;
+ copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
+ copy_struct.src = (uint64_t)(uintptr_t)from_addr;
+ copy_struct.len = pagesize;
+ copy_struct.mode = 0;
+ ret = ioctl(userfault_fd, UFFDIO_COPY, &copy_struct);
+ } else {
+ struct uffdio_zeropage zero_struct;
+ zero_struct.range.start = (uint64_t)(uintptr_t)host_addr;
+ zero_struct.range.len = pagesize;
+ zero_struct.mode = 0;
+ ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct);
+ }
+ if (!ret) {
+ ramblock_recv_bitmap_set_range(rb, host_addr,
+ pagesize / qemu_target_page_size());
+ }
+ return ret;
+}
+
/*
* Place a host page (from) at (host) atomically
* returns 0 on success
*/
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
- size_t pagesize)
+ RAMBlock *rb)
{
- struct uffdio_copy copy_struct;
-
- copy_struct.dst = (uint64_t)(uintptr_t)host;
- copy_struct.src = (uint64_t)(uintptr_t)from;
- copy_struct.len = pagesize;
- copy_struct.mode = 0;
+ size_t pagesize = qemu_ram_pagesize(rb);
/* copy also acks to the kernel waking the stalled thread up
* TODO: We can inhibit that ack and only do it if it was requested
* which would be slightly cheaper, but we'd have to be careful
* of the order of updating our page state.
*/
- if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
+ if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
int e = errno;
error_report("%s: %s copy host: %p from: %p (size: %zd)",
__func__, strerror(e), host, from, pagesize);
@@ -677,17 +697,13 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
* returns 0 on success
*/
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize)
+ RAMBlock *rb)
{
trace_postcopy_place_page_zero(host);
- if (pagesize == getpagesize()) {
- struct uffdio_zeropage zero_struct;
- zero_struct.range.start = (uint64_t)(uintptr_t)host;
- zero_struct.range.len = getpagesize();
- zero_struct.mode = 0;
-
- if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
+ if (qemu_ram_pagesize(rb) == getpagesize()) {
+ if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(),
+ rb)) {
int e = errno;
error_report("%s: %s zero host: %p",
__func__, strerror(e), host);
@@ -711,7 +727,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
}
return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
- pagesize);
+ rb);
}
return 0;
@@ -774,14 +790,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
}
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
- size_t pagesize)
+ RAMBlock *rb)
{
assert(0);
return -1;
}
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize)
+ RAMBlock *rb)
{
assert(0);
return -1;
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 587a8b86a7..77ea0fd264 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -72,14 +72,14 @@ void postcopy_discard_send_finish(MigrationState *ms,
* returns 0 on success
*/
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
- size_t pagesize);
+ RAMBlock *rb);
/*
* Place a zero page at (host) atomically
* returns 0 on success
*/
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
- size_t pagesize);
+ RAMBlock *rb);
/* The current postcopy state is read/set by postcopy_state_get/set
* which update it atomically.
diff --git a/migration/ram.c b/migration/ram.c
index b83f8977c5..7f6327f708 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -42,8 +42,10 @@
#include "postcopy-ram.h"
#include "migration/page_cache.h"
#include "qemu/error-report.h"
+#include "qapi/qmp/qerror.h"
#include "trace.h"
#include "exec/ram_addr.h"
+#include "exec/target_page.h"
#include "qemu/rcu_queue.h"
#include "migration/colo.h"
#include "migration/block.h"
@@ -113,13 +115,24 @@ static void XBZRLE_cache_unlock(void)
* Returns the new_size or negative in case of error.
*
* @new_size: new cache size
+ * @errp: set *errp if the check failed, with reason
*/
-int64_t xbzrle_cache_resize(int64_t new_size)
+int64_t xbzrle_cache_resize(int64_t new_size, Error **errp)
{
PageCache *new_cache;
int64_t ret;
- if (new_size < TARGET_PAGE_SIZE) {
+ /* Check for truncation */
+ if (new_size != (size_t)new_size) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
+ "exceeding address space");
+ return -1;
+ }
+
+ /* Cache should not be larger than guest ram size */
+ if (new_size > ram_bytes_total()) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
+ "exceeds guest ram size");
return -1;
}
@@ -129,10 +142,8 @@ int64_t xbzrle_cache_resize(int64_t new_size)
if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
goto out_new_size;
}
- new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
- TARGET_PAGE_SIZE);
+ new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
if (!new_cache) {
- error_report("Error creating cache");
ret = -1;
goto out;
}
@@ -148,6 +159,35 @@ out:
return ret;
}
+static void ramblock_recv_map_init(void)
+{
+ RAMBlock *rb;
+
+ RAMBLOCK_FOREACH(rb) {
+ assert(!rb->receivedmap);
+ rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
+ }
+}
+
+int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
+{
+ return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
+ rb->receivedmap);
+}
+
+void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
+{
+ set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
+}
+
+void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
+ size_t nr)
+{
+ bitmap_set_atomic(rb->receivedmap,
+ ramblock_recv_bitmap_offset(host_addr, rb),
+ nr);
+}
+
/*
* An outstanding page request, on the source, having been received
* and queued
@@ -1566,6 +1606,31 @@ static void xbzrle_load_cleanup(void)
XBZRLE.decoded_buf = NULL;
}
+static void ram_state_cleanup(RAMState **rsp)
+{
+ migration_page_queue_free(*rsp);
+ qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
+ qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
+ g_free(*rsp);
+ *rsp = NULL;
+}
+
+static void xbzrle_cleanup(void)
+{
+ XBZRLE_cache_lock();
+ if (XBZRLE.cache) {
+ cache_fini(XBZRLE.cache);
+ g_free(XBZRLE.encoded_buf);
+ g_free(XBZRLE.current_buf);
+ g_free(XBZRLE.zero_target_page);
+ XBZRLE.cache = NULL;
+ XBZRLE.encoded_buf = NULL;
+ XBZRLE.current_buf = NULL;
+ XBZRLE.zero_target_page = NULL;
+ }
+ XBZRLE_cache_unlock();
+}
+
static void ram_save_cleanup(void *opaque)
{
RAMState **rsp = opaque;
@@ -1583,22 +1648,9 @@ static void ram_save_cleanup(void *opaque)
block->unsentmap = NULL;
}
- XBZRLE_cache_lock();
- if (XBZRLE.cache) {
- cache_fini(XBZRLE.cache);
- g_free(XBZRLE.encoded_buf);
- g_free(XBZRLE.current_buf);
- g_free(XBZRLE.zero_target_page);
- XBZRLE.cache = NULL;
- XBZRLE.encoded_buf = NULL;
- XBZRLE.current_buf = NULL;
- XBZRLE.zero_target_page = NULL;
- }
- XBZRLE_cache_unlock();
- migration_page_queue_free(*rsp);
+ xbzrle_cleanup();
compress_threads_save_cleanup();
- g_free(*rsp);
- *rsp = NULL;
+ ram_state_cleanup(rsp);
}
static void ram_state_reset(RAMState *rs)
@@ -1999,6 +2051,8 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
goto err;
}
+ bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
+ length >> qemu_target_page_bits());
ret = ram_block_discard_range(rb, start, length);
err:
@@ -2007,63 +2061,96 @@ err:
return ret;
}
+/*
+ * For every allocation, we will try not to crash the VM if the
+ * allocation failed.
+ */
+static int xbzrle_init(void)
+{
+ Error *local_err = NULL;
+
+ if (!migrate_use_xbzrle()) {
+ return 0;
+ }
+
+ XBZRLE_cache_lock();
+
+ XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
+ if (!XBZRLE.zero_target_page) {
+ error_report("%s: Error allocating zero page", __func__);
+ goto err_out;
+ }
+
+ XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
+ TARGET_PAGE_SIZE, &local_err);
+ if (!XBZRLE.cache) {
+ error_report_err(local_err);
+ goto free_zero_page;
+ }
+
+ XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
+ if (!XBZRLE.encoded_buf) {
+ error_report("%s: Error allocating encoded_buf", __func__);
+ goto free_cache;
+ }
+
+ XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
+ if (!XBZRLE.current_buf) {
+ error_report("%s: Error allocating current_buf", __func__);
+ goto free_encoded_buf;
+ }
+
+ /* We are all good */
+ XBZRLE_cache_unlock();
+ return 0;
+
+free_encoded_buf:
+ g_free(XBZRLE.encoded_buf);
+ XBZRLE.encoded_buf = NULL;
+free_cache:
+ cache_fini(XBZRLE.cache);
+ XBZRLE.cache = NULL;
+free_zero_page:
+ g_free(XBZRLE.zero_target_page);
+ XBZRLE.zero_target_page = NULL;
+err_out:
+ XBZRLE_cache_unlock();
+ return -ENOMEM;
+}
+
static int ram_state_init(RAMState **rsp)
{
- *rsp = g_new0(RAMState, 1);
+ *rsp = g_try_new0(RAMState, 1);
+
+ if (!*rsp) {
+ error_report("%s: Init ramstate fail", __func__);
+ return -1;
+ }
qemu_mutex_init(&(*rsp)->bitmap_mutex);
qemu_mutex_init(&(*rsp)->src_page_req_mutex);
QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
- if (migrate_use_xbzrle()) {
- XBZRLE_cache_lock();
- XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE);
- XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
- TARGET_PAGE_SIZE,
- TARGET_PAGE_SIZE);
- if (!XBZRLE.cache) {
- XBZRLE_cache_unlock();
- error_report("Error creating cache");
- g_free(*rsp);
- *rsp = NULL;
- return -1;
- }
- XBZRLE_cache_unlock();
-
- /* We prefer not to abort if there is no memory */
- XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
- if (!XBZRLE.encoded_buf) {
- error_report("Error allocating encoded_buf");
- g_free(*rsp);
- *rsp = NULL;
- return -1;
- }
+ /*
+ * Count the total number of pages used by ram blocks not including any
+ * gaps due to alignment or unplugs.
+ */
+ (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
- XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
- if (!XBZRLE.current_buf) {
- error_report("Error allocating current_buf");
- g_free(XBZRLE.encoded_buf);
- XBZRLE.encoded_buf = NULL;
- g_free(*rsp);
- *rsp = NULL;
- return -1;
- }
- }
+ ram_state_reset(*rsp);
- /* For memory_global_dirty_log_start below. */
- qemu_mutex_lock_iothread();
+ return 0;
+}
- qemu_mutex_lock_ramlist();
- rcu_read_lock();
- ram_state_reset(*rsp);
+static void ram_list_init_bitmaps(void)
+{
+ RAMBlock *block;
+ unsigned long pages;
/* Skip setting bitmap if there is no RAM */
if (ram_bytes_total()) {
- RAMBlock *block;
-
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
-
+ pages = block->max_length >> TARGET_PAGE_BITS;
block->bmap = bitmap_new(pages);
bitmap_set(block->bmap, 0, pages);
if (migrate_postcopy_ram()) {
@@ -2072,18 +2159,36 @@ static int ram_state_init(RAMState **rsp)
}
}
}
+}
- /*
- * Count the total number of pages used by ram blocks not including any
- * gaps due to alignment or unplugs.
- */
- (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+static void ram_init_bitmaps(RAMState *rs)
+{
+ /* For memory_global_dirty_log_start below. */
+ qemu_mutex_lock_iothread();
+ qemu_mutex_lock_ramlist();
+ rcu_read_lock();
+ ram_list_init_bitmaps();
memory_global_dirty_log_start();
- migration_bitmap_sync(*rsp);
+ migration_bitmap_sync(rs);
+
+ rcu_read_unlock();
qemu_mutex_unlock_ramlist();
qemu_mutex_unlock_iothread();
- rcu_read_unlock();
+}
+
+static int ram_init_all(RAMState **rsp)
+{
+ if (ram_state_init(rsp)) {
+ return -1;
+ }
+
+ if (xbzrle_init()) {
+ ram_state_cleanup(rsp);
+ return -1;
+ }
+
+ ram_init_bitmaps(*rsp);
return 0;
}
@@ -2110,7 +2215,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
/* migration has already setup the bitmap, reuse it. */
if (!migration_in_colo_state()) {
- if (ram_state_init(rsp) != 0) {
+ if (ram_init_all(rsp) != 0) {
return -1;
}
}
@@ -2534,13 +2639,20 @@ static int ram_load_setup(QEMUFile *f, void *opaque)
{
xbzrle_load_setup();
compress_threads_load_setup();
+ ramblock_recv_map_init();
return 0;
}
static int ram_load_cleanup(void *opaque)
{
+ RAMBlock *rb;
xbzrle_load_cleanup();
compress_threads_load_cleanup();
+
+ RAMBLOCK_FOREACH(rb) {
+ g_free(rb->receivedmap);
+ rb->receivedmap = NULL;
+ }
return 0;
}
@@ -2680,10 +2792,10 @@ static int ram_load_postcopy(QEMUFile *f)
if (all_zero) {
ret = postcopy_place_page_zero(mis, place_dest,
- block->page_size);
+ block);
} else {
ret = postcopy_place_page(mis, place_dest,
- place_source, block->page_size);
+ place_source, block);
}
}
if (!ret) {
@@ -2755,6 +2867,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ret = -EINVAL;
break;
}
+ ramblock_recv_bitmap_set(block, host);
trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
}
diff --git a/migration/ram.h b/migration/ram.h
index 4a72d66503..f9f7eef894 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -35,7 +35,7 @@
extern MigrationStats ram_counters;
extern XBZRLECacheStats xbzrle_counters;
-int64_t xbzrle_cache_resize(int64_t new_size);
+int64_t xbzrle_cache_resize(int64_t new_size, Error **errp);
uint64_t ram_bytes_remaining(void);
uint64_t ram_bytes_total(void);
@@ -57,4 +57,9 @@ int ram_discard_range(const char *block_name, uint64_t start, size_t length);
int ram_postcopy_incoming_init(MigrationIncomingState *mis);
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
+
+int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr);
+void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr);
+void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr);
+
#endif
diff --git a/migration/tls.c b/migration/tls.c
index 596e8790bd..026a008667 100644
--- a/migration/tls.c
+++ b/migration/tls.c
@@ -119,7 +119,6 @@ static void migration_tls_outgoing_handshake(QIOTask *task,
if (qio_task_propagate_error(task, &err)) {
trace_migration_tls_outgoing_handshake_error(error_get_pretty(err));
migrate_fd_error(s, err);
- error_free(err);
} else {
trace_migration_tls_outgoing_handshake_complete();
migration_channel_connect(s, ioc, NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index f8b365e3f5..6ae866e1aa 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -96,12 +96,18 @@
# @colo: VM is in the process of fault tolerance, VM can not get into this
# state unless colo capability is enabled for migration. (since 2.8)
#
+# @pre-switchover: Paused before device serialisation. (since 2.11)
+#
+# @device: During device serialisation when pause-before-switchover is enabled
+# (since 2.11)
+#
# Since: 2.3
#
##
{ 'enum': 'MigrationStatus',
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
- 'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
+ 'active', 'postcopy-active', 'completed', 'failed', 'colo',
+ 'pre-switchover', 'device' ] }
##
# @MigrationInfo:
@@ -341,6 +347,9 @@
# @return-path: If enabled, migration will use the return path even
# for precopy. (since 2.10)
#
+# @pause-before-switchover: Pause outgoing migration before serialising device
+# state and before disabling block IO (since 2.11)
+#
# @x-multifd: Use more than one fd for migration (since 2.11)
#
# Since: 1.2
@@ -348,7 +357,7 @@
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
- 'block', 'return-path', 'x-multifd' ] }
+ 'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] }
##
# @MigrationCapabilityStatus:
@@ -471,7 +480,7 @@
# number of sockets used for migration. The
# default value is 2 (since 2.11)
#
-# @x-multifd-page-count: Number of pages sent together to a thread
+# @x-multifd-page-count: Number of pages sent together to a thread.
# The default value is 16 (since 2.11)
#
# Since: 2.4
@@ -542,7 +551,7 @@
# number of sockets used for migration. The
# default value is 2 (since 2.11)
#
-# @x-multifd-page-count: Number of pages sent together to a thread
+# @x-multifd-page-count: Number of pages sent together to a thread.
# The default value is 16 (since 2.11)
#
# Since: 2.4
@@ -638,7 +647,7 @@
# number of sockets used for migration.
# The default value is 2 (since 2.11)
#
-# @x-multifd-page-count: Number of pages sent together to a thread
+# @x-multifd-page-count: Number of pages sent together to a thread.
# The default value is 16 (since 2.11)
#
# Since: 2.4
@@ -868,6 +877,23 @@
{ 'command': 'migrate_cancel' }
##
+# @migrate-continue:
+#
+# Continue migration when it's in a paused state.
+#
+# @state: The state the migration is currently expected to be in
+#
+# Returns: nothing on success
+# Since: 2.11
+# Example:
+#
+# -> { "execute": "migrate-continue" , "arguments":
+# { "state": "pre-switchover" } }
+# <- { "return": {} }
+##
+{ 'command': 'migrate-continue', 'data': {'state': 'MigrationStatus'} }
+
+##
# @migrate_set_downtime:
#
# Set maximum tolerated downtime for migration.