diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | include/migration/migration.h | 2 | ||||
-rw-r--r-- | include/migration/vmstate.h | 100 | ||||
-rw-r--r-- | migration/migration.c | 89 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 4 | ||||
-rw-r--r-- | migration/savevm.c | 34 |
6 files changed, 165 insertions, 65 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ec8a037bc3..caa5260e7c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1245,6 +1245,7 @@ F: include/migration/ F: migration/ F: scripts/vmstate-static-checker.py F: tests/vmstate-static-checker-data/ +F: docs/migration.txt Seccomp M: Eduardo Otubo <eduardo.otubo@profitbricks.com> diff --git a/include/migration/migration.h b/include/migration/migration.h index 85b6026d10..ac2c12c2a5 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -104,6 +104,8 @@ struct MigrationIncomingState { QemuMutex rp_mutex; /* We send replies from multiple threads */ void *postcopy_tmp_page; + QEMUBH *bh; + int state; /* See savevm.c */ LoadStateEntry_Head loadvm_handlers; diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 7246f29afe..84ee355ceb 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -88,21 +88,101 @@ struct VMStateInfo { }; enum VMStateFlags { + /* Ignored */ VMS_SINGLE = 0x001, + + /* The struct member at opaque + VMStateField.offset is a pointer + * to the actual field (e.g. struct a { uint8_t *b; + * }). Dereference the pointer before using it as basis for + * further pointer arithmetic (see e.g. VMS_ARRAY). Does not + * affect the meaning of VMStateField.num_offset or + * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for + * those. */ VMS_POINTER = 0x002, + + /* The field is an array of fixed size. VMStateField.num contains + * the number of entries in the array. The size of each entry is + * given by VMStateField.size and / or opaque + + * VMStateField.size_offset; see VMS_VBUFFER and + * VMS_MULTIPLY. Each array entry will be processed individually + * (VMStateField.info.get()/put() if VMS_STRUCT is not set, + * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not + * be combined with VMS_VARRAY*. */ VMS_ARRAY = 0x004, + + /* The field is itself a struct, containing one or more + * fields. Recurse into VMStateField.vmsd. Most useful in + * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each + * array entry. */ VMS_STRUCT = 0x008, - VMS_VARRAY_INT32 = 0x010, /* Array with size in int32_t field*/ - VMS_BUFFER = 0x020, /* static sized buffer */ + + /* The field is an array of variable size. The int32_t at opaque + + * VMStateField.num_offset contains the number of entries in the + * array. See the VMS_ARRAY description regarding array handling + * in general. May not be combined with VMS_ARRAY or any other + * VMS_VARRAY*. */ + VMS_VARRAY_INT32 = 0x010, + + /* Ignored */ + VMS_BUFFER = 0x020, + + /* The field is a (fixed-size or variable-size) array of pointers + * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry + * before using it. Note: Does not imply any one of VMS_ARRAY / + * VMS_VARRAY*; these need to be set explicitly. */ VMS_ARRAY_OF_POINTER = 0x040, - VMS_VARRAY_UINT16 = 0x080, /* Array with size in uint16_t field */ - VMS_VBUFFER = 0x100, /* Buffer with size in int32_t field */ - VMS_MULTIPLY = 0x200, /* multiply "size" field by field_size */ - VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/ - VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/ - VMS_MUST_EXIST = 0x1000, /* Field must exist in input */ - VMS_ALLOC = 0x2000, /* Alloc a buffer on the destination */ - VMS_MULTIPLY_ELEMENTS = 0x4000, /* multiply varray size by field->num */ + + /* The field is an array of variable size. The uint16_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT16 = 0x080, + + /* The size of the individual entries (a single array entry if + * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if + * neither is set) is variable (i.e. not known at compile-time), + * but the same for all entries. Use the int32_t at opaque + + * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine + * the size of each (and every) entry. */ + VMS_VBUFFER = 0x100, + + /* Multiply the entry size given by the int32_t at opaque + + * VMStateField.size_offset (see VMS_VBUFFER description) with + * VMStateField.size to determine the number of bytes to be + * allocated. Only valid in combination with VMS_VBUFFER. */ + VMS_MULTIPLY = 0x200, + + /* The field is an array of variable size. The uint8_t at opaque + + * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT8 = 0x400, + + /* The field is an array of variable size. The uint32_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT32 = 0x800, + + /* Fail loading the serialised VM state if this field is missing + * from the input. */ + VMS_MUST_EXIST = 0x1000, + + /* When loading serialised VM state, allocate memory for the + * (entire) field. Only valid in combination with + * VMS_POINTER. Note: Not all combinations with other flags are + * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't + * cause the individual entries to be allocated. */ + VMS_ALLOC = 0x2000, + + /* Multiply the number of entries given by the integer at opaque + + * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num + * to determine the number of entries in the array. Only valid in + * combination with one of VMS_VARRAY*. */ + VMS_MULTIPLY_ELEMENTS = 0x4000, }; typedef struct { diff --git a/migration/migration.c b/migration/migration.c index fc5e50b0be..0129d9f420 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -323,10 +323,56 @@ void qemu_start_incoming_migration(const char *uri, Error **errp) } } +static void process_incoming_migration_bh(void *opaque) +{ + Error *local_err = NULL; + MigrationIncomingState *mis = opaque; + + /* Make sure all file formats flush their mutable metadata */ + bdrv_invalidate_cache_all(&local_err); + if (local_err) { + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_FAILED); + error_report_err(local_err); + migrate_decompress_threads_join(); + exit(EXIT_FAILURE); + } + + /* + * This must happen after all error conditions are dealt with and + * we're sure the VM is going to be running on this host. + */ + qemu_announce_self(); + + /* If global state section was not received or we are in running + state, we need to obey autostart. Any other state is set with + runstate_set. */ + + if (!global_state_received() || + global_state_get_runstate() == RUN_STATE_RUNNING) { + if (autostart) { + vm_start(); + } else { + runstate_set(RUN_STATE_PAUSED); + } + } else { + runstate_set(global_state_get_runstate()); + } + migrate_decompress_threads_join(); + /* + * This must happen after any state changes since as soon as an external + * observer sees this event they might start to prod at the VM assuming + * it's ready to use. + */ + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COMPLETED); + qemu_bh_delete(mis->bh); + migration_incoming_state_destroy(); +} + static void process_incoming_migration_co(void *opaque) { QEMUFile *f = opaque; - Error *local_err = NULL; MigrationIncomingState *mis; PostcopyState ps; int ret; @@ -369,45 +415,8 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } - /* Make sure all file formats flush their mutable metadata */ - bdrv_invalidate_cache_all(&local_err); - if (local_err) { - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_FAILED); - error_report_err(local_err); - migrate_decompress_threads_join(); - exit(EXIT_FAILURE); - } - - /* - * This must happen after all error conditions are dealt with and - * we're sure the VM is going to be running on this host. - */ - qemu_announce_self(); - - /* If global state section was not received or we are in running - state, we need to obey autostart. Any other state is set with - runstate_set. */ - - if (!global_state_received() || - global_state_get_runstate() == RUN_STATE_RUNNING) { - if (autostart) { - vm_start(); - } else { - runstate_set(RUN_STATE_PAUSED); - } - } else { - runstate_set(global_state_get_runstate()); - } - migrate_decompress_threads_join(); - /* - * This must happen after any state changes since as soon as an external - * observer sees this event they might start to prod at the VM assuming - * it's ready to use. - */ - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_COMPLETED); - migration_incoming_state_destroy(); + mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); + qemu_bh_schedule(mis->bh); } void process_incoming_migration(QEMUFile *f) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 254c629d48..fbd0064fce 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -52,14 +52,14 @@ struct PostcopyDiscardState { #if defined(__linux__) #include <poll.h> -#include <sys/eventfd.h> #include <sys/mman.h> #include <sys/ioctl.h> #include <sys/syscall.h> #include <asm/types.h> /* for __u64 */ #endif -#if defined(__linux__) && defined(__NR_userfaultfd) +#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) +#include <sys/eventfd.h> #include <linux/userfaultfd.h> static bool ufd_version_check(int ufd) diff --git a/migration/savevm.c b/migration/savevm.c index 94f2894243..b45915612f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1088,12 +1088,11 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) json_prop_int(vmdesc, "instance_id", se->instance_id); save_section_header(f, se, QEMU_VM_SECTION_FULL); - vmstate_save(f, se, vmdesc); - - json_end_object(vmdesc); trace_savevm_section_end(se->idstr, se->section_id, 0); save_section_footer(f, se); + + json_end_object(vmdesc); } if (!in_postcopy) { @@ -1496,17 +1495,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) return 0; } -/* After all discards we can start running and asking for pages */ -static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) +static void loadvm_postcopy_handle_run_bh(void *opaque) { - PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); Error *local_err = NULL; - - trace_loadvm_postcopy_handle_run(); - if (ps != POSTCOPY_INCOMING_LISTENING) { - error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); - return -1; - } + MigrationIncomingState *mis = opaque; /* TODO we should move all of this lot into postcopy_ram.c or a shared code * in migration.c @@ -1519,7 +1511,6 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) bdrv_invalidate_cache_all(&local_err); if (local_err) { error_report_err(local_err); - return -1; } trace_loadvm_postcopy_handle_run_cpu_sync(); @@ -1535,6 +1526,23 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) runstate_set(RUN_STATE_PAUSED); } + qemu_bh_delete(mis->bh); +} + +/* After all discards we can start running and asking for pages */ +static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) +{ + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); + + trace_loadvm_postcopy_handle_run(); + if (ps != POSTCOPY_INCOMING_LISTENING) { + error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); + return -1; + } + + mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, NULL); + qemu_bh_schedule(mis->bh); + /* We need to finish reading the stream from the package * and also stop reading anything more from the stream that loaded the * package (since it's now being read by the listener thread). |