aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--include/migration/migration.h2
-rw-r--r--include/migration/vmstate.h100
-rw-r--r--migration/migration.c89
-rw-r--r--migration/postcopy-ram.c4
-rw-r--r--migration/savevm.c34
6 files changed, 165 insertions, 65 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index ec8a037bc3..caa5260e7c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1245,6 +1245,7 @@ F: include/migration/
F: migration/
F: scripts/vmstate-static-checker.py
F: tests/vmstate-static-checker-data/
+F: docs/migration.txt
Seccomp
M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 85b6026d10..ac2c12c2a5 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -104,6 +104,8 @@ struct MigrationIncomingState {
QemuMutex rp_mutex; /* We send replies from multiple threads */
void *postcopy_tmp_page;
+ QEMUBH *bh;
+
int state;
/* See savevm.c */
LoadStateEntry_Head loadvm_handlers;
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 7246f29afe..84ee355ceb 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -88,21 +88,101 @@ struct VMStateInfo {
};
enum VMStateFlags {
+ /* Ignored */
VMS_SINGLE = 0x001,
+
+ /* The struct member at opaque + VMStateField.offset is a pointer
+ * to the actual field (e.g. struct a { uint8_t *b;
+ * }). Dereference the pointer before using it as basis for
+ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not
+ * affect the meaning of VMStateField.num_offset or
+ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for
+ * those. */
VMS_POINTER = 0x002,
+
+ /* The field is an array of fixed size. VMStateField.num contains
+ * the number of entries in the array. The size of each entry is
+ * given by VMStateField.size and / or opaque +
+ * VMStateField.size_offset; see VMS_VBUFFER and
+ * VMS_MULTIPLY. Each array entry will be processed individually
+ * (VMStateField.info.get()/put() if VMS_STRUCT is not set,
+ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not
+ * be combined with VMS_VARRAY*. */
VMS_ARRAY = 0x004,
+
+ /* The field is itself a struct, containing one or more
+ * fields. Recurse into VMStateField.vmsd. Most useful in
+ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each
+ * array entry. */
VMS_STRUCT = 0x008,
- VMS_VARRAY_INT32 = 0x010, /* Array with size in int32_t field*/
- VMS_BUFFER = 0x020, /* static sized buffer */
+
+ /* The field is an array of variable size. The int32_t at opaque +
+ * VMStateField.num_offset contains the number of entries in the
+ * array. See the VMS_ARRAY description regarding array handling
+ * in general. May not be combined with VMS_ARRAY or any other
+ * VMS_VARRAY*. */
+ VMS_VARRAY_INT32 = 0x010,
+
+ /* Ignored */
+ VMS_BUFFER = 0x020,
+
+ /* The field is a (fixed-size or variable-size) array of pointers
+ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry
+ * before using it. Note: Does not imply any one of VMS_ARRAY /
+ * VMS_VARRAY*; these need to be set explicitly. */
VMS_ARRAY_OF_POINTER = 0x040,
- VMS_VARRAY_UINT16 = 0x080, /* Array with size in uint16_t field */
- VMS_VBUFFER = 0x100, /* Buffer with size in int32_t field */
- VMS_MULTIPLY = 0x200, /* multiply "size" field by field_size */
- VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/
- VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/
- VMS_MUST_EXIST = 0x1000, /* Field must exist in input */
- VMS_ALLOC = 0x2000, /* Alloc a buffer on the destination */
- VMS_MULTIPLY_ELEMENTS = 0x4000, /* multiply varray size by field->num */
+
+ /* The field is an array of variable size. The uint16_t at opaque
+ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS)
+ * contains the number of entries in the array. See the VMS_ARRAY
+ * description regarding array handling in general. May not be
+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */
+ VMS_VARRAY_UINT16 = 0x080,
+
+ /* The size of the individual entries (a single array entry if
+ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if
+ * neither is set) is variable (i.e. not known at compile-time),
+ * but the same for all entries. Use the int32_t at opaque +
+ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine
+ * the size of each (and every) entry. */
+ VMS_VBUFFER = 0x100,
+
+ /* Multiply the entry size given by the int32_t at opaque +
+ * VMStateField.size_offset (see VMS_VBUFFER description) with
+ * VMStateField.size to determine the number of bytes to be
+ * allocated. Only valid in combination with VMS_VBUFFER. */
+ VMS_MULTIPLY = 0x200,
+
+ /* The field is an array of variable size. The uint8_t at opaque +
+ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS)
+ * contains the number of entries in the array. See the VMS_ARRAY
+ * description regarding array handling in general. May not be
+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */
+ VMS_VARRAY_UINT8 = 0x400,
+
+ /* The field is an array of variable size. The uint32_t at opaque
+ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS)
+ * contains the number of entries in the array. See the VMS_ARRAY
+ * description regarding array handling in general. May not be
+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */
+ VMS_VARRAY_UINT32 = 0x800,
+
+ /* Fail loading the serialised VM state if this field is missing
+ * from the input. */
+ VMS_MUST_EXIST = 0x1000,
+
+ /* When loading serialised VM state, allocate memory for the
+ * (entire) field. Only valid in combination with
+ * VMS_POINTER. Note: Not all combinations with other flags are
+ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't
+ * cause the individual entries to be allocated. */
+ VMS_ALLOC = 0x2000,
+
+ /* Multiply the number of entries given by the integer at opaque +
+ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num
+ * to determine the number of entries in the array. Only valid in
+ * combination with one of VMS_VARRAY*. */
+ VMS_MULTIPLY_ELEMENTS = 0x4000,
};
typedef struct {
diff --git a/migration/migration.c b/migration/migration.c
index fc5e50b0be..0129d9f420 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -323,10 +323,56 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
}
}
+static void process_incoming_migration_bh(void *opaque)
+{
+ Error *local_err = NULL;
+ MigrationIncomingState *mis = opaque;
+
+ /* Make sure all file formats flush their mutable metadata */
+ bdrv_invalidate_cache_all(&local_err);
+ if (local_err) {
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ error_report_err(local_err);
+ migrate_decompress_threads_join();
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * This must happen after all error conditions are dealt with and
+ * we're sure the VM is going to be running on this host.
+ */
+ qemu_announce_self();
+
+ /* If global state section was not received or we are in running
+ state, we need to obey autostart. Any other state is set with
+ runstate_set. */
+
+ if (!global_state_received() ||
+ global_state_get_runstate() == RUN_STATE_RUNNING) {
+ if (autostart) {
+ vm_start();
+ } else {
+ runstate_set(RUN_STATE_PAUSED);
+ }
+ } else {
+ runstate_set(global_state_get_runstate());
+ }
+ migrate_decompress_threads_join();
+ /*
+ * This must happen after any state changes since as soon as an external
+ * observer sees this event they might start to prod at the VM assuming
+ * it's ready to use.
+ */
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_COMPLETED);
+ qemu_bh_delete(mis->bh);
+ migration_incoming_state_destroy();
+}
+
static void process_incoming_migration_co(void *opaque)
{
QEMUFile *f = opaque;
- Error *local_err = NULL;
MigrationIncomingState *mis;
PostcopyState ps;
int ret;
@@ -369,45 +415,8 @@ static void process_incoming_migration_co(void *opaque)
exit(EXIT_FAILURE);
}
- /* Make sure all file formats flush their mutable metadata */
- bdrv_invalidate_cache_all(&local_err);
- if (local_err) {
- migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
- MIGRATION_STATUS_FAILED);
- error_report_err(local_err);
- migrate_decompress_threads_join();
- exit(EXIT_FAILURE);
- }
-
- /*
- * This must happen after all error conditions are dealt with and
- * we're sure the VM is going to be running on this host.
- */
- qemu_announce_self();
-
- /* If global state section was not received or we are in running
- state, we need to obey autostart. Any other state is set with
- runstate_set. */
-
- if (!global_state_received() ||
- global_state_get_runstate() == RUN_STATE_RUNNING) {
- if (autostart) {
- vm_start();
- } else {
- runstate_set(RUN_STATE_PAUSED);
- }
- } else {
- runstate_set(global_state_get_runstate());
- }
- migrate_decompress_threads_join();
- /*
- * This must happen after any state changes since as soon as an external
- * observer sees this event they might start to prod at the VM assuming
- * it's ready to use.
- */
- migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
- MIGRATION_STATUS_COMPLETED);
- migration_incoming_state_destroy();
+ mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
+ qemu_bh_schedule(mis->bh);
}
void process_incoming_migration(QEMUFile *f)
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 254c629d48..fbd0064fce 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -52,14 +52,14 @@ struct PostcopyDiscardState {
#if defined(__linux__)
#include <poll.h>
-#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <asm/types.h> /* for __u64 */
#endif
-#if defined(__linux__) && defined(__NR_userfaultfd)
+#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
+#include <sys/eventfd.h>
#include <linux/userfaultfd.h>
static bool ufd_version_check(int ufd)
diff --git a/migration/savevm.c b/migration/savevm.c
index 94f2894243..b45915612f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1088,12 +1088,11 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
json_prop_int(vmdesc, "instance_id", se->instance_id);
save_section_header(f, se, QEMU_VM_SECTION_FULL);
-
vmstate_save(f, se, vmdesc);
-
- json_end_object(vmdesc);
trace_savevm_section_end(se->idstr, se->section_id, 0);
save_section_footer(f, se);
+
+ json_end_object(vmdesc);
}
if (!in_postcopy) {
@@ -1496,17 +1495,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
return 0;
}
-/* After all discards we can start running and asking for pages */
-static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
+static void loadvm_postcopy_handle_run_bh(void *opaque)
{
- PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
Error *local_err = NULL;
-
- trace_loadvm_postcopy_handle_run();
- if (ps != POSTCOPY_INCOMING_LISTENING) {
- error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
- return -1;
- }
+ MigrationIncomingState *mis = opaque;
/* TODO we should move all of this lot into postcopy_ram.c or a shared code
* in migration.c
@@ -1519,7 +1511,6 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
bdrv_invalidate_cache_all(&local_err);
if (local_err) {
error_report_err(local_err);
- return -1;
}
trace_loadvm_postcopy_handle_run_cpu_sync();
@@ -1535,6 +1526,23 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
runstate_set(RUN_STATE_PAUSED);
}
+ qemu_bh_delete(mis->bh);
+}
+
+/* After all discards we can start running and asking for pages */
+static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
+{
+ PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
+
+ trace_loadvm_postcopy_handle_run();
+ if (ps != POSTCOPY_INCOMING_LISTENING) {
+ error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
+ return -1;
+ }
+
+ mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, NULL);
+ qemu_bh_schedule(mis->bh);
+
/* We need to finish reading the stream from the package
* and also stop reading anything more from the stream that loaded the
* package (since it's now being read by the listener thread).