diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-09-22 14:04:10 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-09-22 14:04:10 +0100 |
commit | bef81b3eb521f2825d85b8ddf246c81a28dcef35 (patch) | |
tree | 1d75795c73f05daa4d58fa12b6b81ccdba530754 | |
parent | 159a9df02127cbf6fe640f67954794a99a12a005 (diff) | |
parent | 54ae0886b12c4934e84381af777d4df6147cc2ec (diff) |
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170922-1' into staging
migration/next for 20170922
# gpg: Signature made Fri 22 Sep 2017 13:15:06 BST
# gpg: using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg: aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723
* remotes/juanquintela/tags/migration/20170922-1:
migration: split ufd_version_check onto receive/request features part
migration: fix hardcoded function name in error report
migration: pass MigrationIncomingState* into migration check functions
migration: split common postcopy out of ram postcopy
migration: fix ram_save_pending
migration: add has_postcopy savevm handler
bitmap: provide to_le/from_le helpers
bitmap: introduce bitmap_count_one()
bitmap: remove BITOP_WORD()
migration: Split migration_fd_process_incoming
migration: Create multifd migration threads
migration: Create x-multifd-page-count parameter
migration: Create x-multifd-channels parameter
migration: Add multifd capability
migration: Create migration_has_all_channels
migration: Add comments to channel functions
migration: Teach it about G_SOURCE_REMOVE
migration: Create migration_ioc_process_incoming()
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | hmp.c | 14 | ||||
-rw-r--r-- | include/glib-compat.h | 2 | ||||
-rw-r--r-- | include/migration/register.h | 1 | ||||
-rw-r--r-- | include/qemu/bitmap.h | 17 | ||||
-rw-r--r-- | migration/channel.c | 18 | ||||
-rw-r--r-- | migration/exec.c | 2 | ||||
-rw-r--r-- | migration/fd.c | 2 | ||||
-rw-r--r-- | migration/migration.c | 176 | ||||
-rw-r--r-- | migration/migration.h | 9 | ||||
-rw-r--r-- | migration/postcopy-ram.c | 100 | ||||
-rw-r--r-- | migration/postcopy-ram.h | 2 | ||||
-rw-r--r-- | migration/ram.c | 216 | ||||
-rw-r--r-- | migration/ram.h | 5 | ||||
-rw-r--r-- | migration/savevm.c | 56 | ||||
-rw-r--r-- | migration/socket.c | 10 | ||||
-rw-r--r-- | qapi/migration.json | 39 | ||||
-rw-r--r-- | util/bitmap.c | 47 | ||||
-rw-r--r-- | util/bitops.c | 6 |
18 files changed, 664 insertions, 58 deletions
@@ -336,6 +336,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %s\n", MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL), params->block_incremental ? "on" : "off"); + monitor_printf(mon, "%s: %" PRId64 "\n", + MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_CHANNELS), + params->x_multifd_channels); + monitor_printf(mon, "%s: %" PRId64 "\n", + MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_PAGE_COUNT), + params->x_multifd_page_count); } qapi_free_MigrationParameters(params); @@ -1621,6 +1627,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_block_incremental = true; visit_type_bool(v, param, &p->block_incremental, &err); break; + case MIGRATION_PARAMETER_X_MULTIFD_CHANNELS: + p->has_x_multifd_channels = true; + visit_type_int(v, param, &p->x_multifd_channels, &err); + break; + case MIGRATION_PARAMETER_X_MULTIFD_PAGE_COUNT: + p->has_x_multifd_page_count = true; + visit_type_int(v, param, &p->x_multifd_page_count, &err); + break; default: assert(0); } diff --git a/include/glib-compat.h b/include/glib-compat.h index fcffcd3f07..e15aca2d40 100644 --- a/include/glib-compat.h +++ b/include/glib-compat.h @@ -223,6 +223,8 @@ static inline gboolean g_hash_table_contains(GHashTable *hash_table, { return g_hash_table_lookup_extended(hash_table, key, NULL, NULL); } +#define G_SOURCE_CONTINUE TRUE +#define G_SOURCE_REMOVE FALSE #endif #ifndef g_assert_true diff --git a/include/migration/register.h b/include/migration/register.h index a0f1edd8c7..f4f7bdc177 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -24,6 +24,7 @@ typedef struct SaveVMHandlers { /* This runs both outside and inside the iothread lock. */ bool (*is_active)(void *opaque); + bool (*has_postcopy)(void *opaque); /* This runs outside the iothread lock in the migration case, and * within the lock in the savevm case. The callback had better only diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index c318da12d7..509eeddece 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -39,6 +39,8 @@ * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_test_and_clear_atomic(dst, pos, nbits) Test and clear area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area + * bitmap_to_le(dst, src, nbits) Convert bitmap to little endian + * bitmap_from_le(dst, src, nbits) Convert bitmap from little endian */ /* @@ -82,6 +84,7 @@ int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, long bits); int slow_bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, long bits); +long slow_bitmap_count_one(const unsigned long *bitmap, long nbits); static inline unsigned long *bitmap_try_new(long nbits) { @@ -216,6 +219,15 @@ static inline int bitmap_intersects(const unsigned long *src1, } } +static inline long bitmap_count_one(const unsigned long *bitmap, long nbits) +{ + if (small_nbits(nbits)) { + return ctpopl(*bitmap & BITMAP_LAST_WORD_MASK(nbits)); + } else { + return slow_bitmap_count_one(bitmap, nbits); + } +} + void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); @@ -237,4 +249,9 @@ static inline unsigned long *bitmap_zero_extend(unsigned long *old, return new; } +void bitmap_to_le(unsigned long *dst, const unsigned long *src, + long nbits); +void bitmap_from_le(unsigned long *dst, const unsigned long *src, + long nbits); + #endif /* BITMAP_H */ diff --git a/migration/channel.c b/migration/channel.c index 3b7252f5a2..70ec7ea3b7 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -19,6 +19,14 @@ #include "qapi/error.h" #include "io/channel-tls.h" +/** + * @migration_channel_process_incoming - Create new incoming migration channel + * + * Notice that TLS is special. For it we listen in a listener socket, + * and then create a new client socket from the TLS library. + * + * @ioc: Channel to which we are connecting + */ void migration_channel_process_incoming(QIOChannel *ioc) { MigrationState *s = migrate_get_current(); @@ -36,12 +44,18 @@ void migration_channel_process_incoming(QIOChannel *ioc) error_report_err(local_err); } } else { - QEMUFile *f = qemu_fopen_channel_input(ioc); - migration_fd_process_incoming(f); + migration_ioc_process_incoming(ioc); } } +/** + * @migration_channel_connect - Create new outgoing migration channel + * + * @s: Current migration state + * @ioc: Channel to which we are connecting + * @hostname: Where we want to connect + */ void migration_channel_connect(MigrationState *s, QIOChannel *ioc, const char *hostname) diff --git a/migration/exec.c b/migration/exec.c index 08b599e0e2..f3be1baf2e 100644 --- a/migration/exec.c +++ b/migration/exec.c @@ -49,7 +49,7 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc, { migration_channel_process_incoming(ioc); object_unref(OBJECT(ioc)); - return FALSE; /* unregister */ + return G_SOURCE_REMOVE; } void exec_start_incoming_migration(const char *command, Error **errp) diff --git a/migration/fd.c b/migration/fd.c index 30f5258a6a..30de4b9847 100644 --- a/migration/fd.c +++ b/migration/fd.c @@ -49,7 +49,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc, { migration_channel_process_incoming(ioc); object_unref(OBJECT(ioc)); - return FALSE; /* unregister */ + return G_SOURCE_REMOVE; } void fd_start_incoming_migration(const char *infd, Error **errp) diff --git a/migration/migration.c b/migration/migration.c index 959e8ec88e..98429dc843 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -77,6 +77,8 @@ * Note: Please change this default value to 10000 when we support hybrid mode. */ #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 +#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -279,6 +281,10 @@ static void process_incoming_migration_bh(void *opaque) */ qemu_announce_self(); + if (multifd_load_cleanup(&local_err) != 0) { + error_report_err(local_err); + autostart = false; + } /* If global state section was not received or we are in running state, we need to obey autostart. Any other state is set with runstate_set. */ @@ -306,17 +312,16 @@ static void process_incoming_migration_bh(void *opaque) static void process_incoming_migration_co(void *opaque) { - QEMUFile *f = opaque; MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyState ps; int ret; - mis->from_src_file = f; + assert(mis->from_src_file); mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_ACTIVE); - ret = qemu_loadvm_state(f); + ret = qemu_loadvm_state(mis->from_src_file); ps = postcopy_state_get(); trace_process_incoming_migration_co_end(ret, ps); @@ -352,24 +357,71 @@ static void process_incoming_migration_co(void *opaque) } if (ret < 0) { + Error *local_err = NULL; + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); error_report("load of migration failed: %s", strerror(-ret)); qemu_fclose(mis->from_src_file); + if (multifd_load_cleanup(&local_err) != 0) { + error_report_err(local_err); + } exit(EXIT_FAILURE); } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); qemu_bh_schedule(mis->bh); } -void migration_fd_process_incoming(QEMUFile *f) +static void migration_incoming_setup(QEMUFile *f) { - Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f); + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (multifd_load_setup() != 0) { + /* We haven't been able to create multifd threads + nothing better to do */ + exit(EXIT_FAILURE); + } + if (!mis->from_src_file) { + mis->from_src_file = f; + } qemu_file_set_blocking(f, false); +} + +static void migration_incoming_process(void) +{ + Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); qemu_coroutine_enter(co); } +void migration_fd_process_incoming(QEMUFile *f) +{ + migration_incoming_setup(f); + migration_incoming_process(); +} + +void migration_ioc_process_incoming(QIOChannel *ioc) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!mis->from_src_file) { + QEMUFile *f = qemu_fopen_channel_input(ioc); + migration_fd_process_incoming(f); + } + /* We still only have a single channel. Nothing to do here yet */ +} + +/** + * @migration_has_all_channels: We have received all channels that we need + * + * Returns true when we have got connections to all the channels that + * we need for migration. + */ +bool migration_has_all_channels(void) +{ + return true; +} + /* * Send a 'SHUT' message on the return channel with the given value * to indicate that we've finished with the RP. Non-0 value indicates @@ -456,6 +508,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; params->has_block_incremental = true; params->block_incremental = s->parameters.block_incremental; + params->has_x_multifd_channels = true; + params->x_multifd_channels = s->parameters.x_multifd_channels; + params->has_x_multifd_page_count = true; + params->x_multifd_page_count = s->parameters.x_multifd_page_count; return params; } @@ -603,6 +659,7 @@ static bool migrate_caps_check(bool *cap_list, { MigrationCapabilityStatusList *cap; bool old_postcopy_cap; + MigrationIncomingState *mis = migration_incoming_get_current(); old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; @@ -636,7 +693,7 @@ static bool migrate_caps_check(bool *cap_list, * special support. */ if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && - !postcopy_ram_supported_by_host()) { + !postcopy_ram_supported_by_host(mis)) { /* postcopy_ram_supported_by_host will have emitted a more * detailed message */ @@ -737,6 +794,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) "is invalid, it should be positive"); return false; } + if (params->has_x_multifd_channels && + (params->x_multifd_channels < 1 || params->x_multifd_channels > 255)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "multifd_channels", + "is invalid, it should be in the range of 1 to 255"); + return false; + } + if (params->has_x_multifd_page_count && + (params->x_multifd_page_count < 1 || + params->x_multifd_page_count > 10000)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "multifd_page_count", + "is invalid, it should be in the range of 1 to 10000"); + return false; + } return true; } @@ -855,6 +927,12 @@ static void migrate_params_apply(MigrateSetParameters *params) if (params->has_block_incremental) { s->parameters.block_incremental = params->block_incremental; } + if (params->has_x_multifd_channels) { + s->parameters.x_multifd_channels = params->x_multifd_channels; + } + if (params->has_x_multifd_page_count) { + s->parameters.x_multifd_page_count = params->x_multifd_page_count; + } } void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) @@ -968,6 +1046,8 @@ static void migrate_fd_cleanup(void *opaque) s->cleanup_bh = NULL; if (s->to_dst_file) { + Error *local_err = NULL; + trace_migrate_fd_cleanup(); qemu_mutex_unlock_iothread(); if (s->migration_thread_running) { @@ -976,6 +1056,9 @@ static void migrate_fd_cleanup(void *opaque) } qemu_mutex_lock_iothread(); + if (multifd_save_cleanup(&local_err) != 0) { + error_report_err(local_err); + } qemu_fclose(s->to_dst_file); s->to_dst_file = NULL; } @@ -1361,6 +1444,11 @@ bool migrate_postcopy_ram(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; } +bool migrate_postcopy(void) +{ + return migrate_postcopy_ram(); +} + bool migrate_auto_converge(void) { MigrationState *s; @@ -1424,6 +1512,33 @@ bool migrate_use_events(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; } +bool migrate_use_multifd(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; +} + +int migrate_multifd_channels(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.x_multifd_channels; +} + +int migrate_multifd_page_count(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.x_multifd_page_count; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -1717,9 +1832,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) * need to tell the destination to throw any pages it's already received * that are dirty */ - if (ram_postcopy_send_discard_bitmap(ms)) { - error_report("postcopy send discard bitmap failed"); - goto fail; + if (migrate_postcopy_ram()) { + if (ram_postcopy_send_discard_bitmap(ms)) { + error_report("postcopy send discard bitmap failed"); + goto fail; + } } /* @@ -1728,8 +1845,10 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) * wrap their state up here */ qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); - /* Ping just for debugging, helps line traces up */ - qemu_savevm_send_ping(ms->to_dst_file, 2); + if (migrate_postcopy_ram()) { + /* Ping just for debugging, helps line traces up */ + qemu_savevm_send_ping(ms->to_dst_file, 2); + } /* * While loading the device state we may trigger page transfer @@ -1754,7 +1873,9 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) qemu_savevm_send_postcopy_listen(fb); qemu_savevm_state_complete_precopy(fb, false, false); - qemu_savevm_send_ping(fb, 3); + if (migrate_postcopy_ram()) { + qemu_savevm_send_ping(fb, 3); + } qemu_savevm_send_postcopy_run(fb); @@ -1789,11 +1910,13 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) qemu_mutex_unlock_iothread(); - /* - * Although this ping is just for debug, it could potentially be - * used for getting a better measurement of downtime at the source. - */ - qemu_savevm_send_ping(ms->to_dst_file, 4); + if (migrate_postcopy_ram()) { + /* + * Although this ping is just for debug, it could potentially be + * used for getting a better measurement of downtime at the source. + */ + qemu_savevm_send_ping(ms->to_dst_file, 4); + } if (migrate_release_ram()) { ram_postcopy_migrated_memory_release(ms); @@ -1971,7 +2094,7 @@ static void *migration_thread(void *opaque) qemu_savevm_send_ping(s->to_dst_file, 1); } - if (migrate_postcopy_ram()) { + if (migrate_postcopy()) { /* * Tell the destination that we *might* want to do postcopy later; * if the other end can't do postcopy it should fail now, nice and @@ -2004,7 +2127,7 @@ static void *migration_thread(void *opaque) if (pending_size && pending_size >= threshold_size) { /* Still a significant amount to transfer */ - if (migrate_postcopy_ram() && + if (migrate_postcopy() && s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && pend_nonpost <= threshold_size && atomic_read(&s->start_postcopy)) { @@ -2138,6 +2261,12 @@ void migrate_fd_connect(MigrationState *s) } } + if (multifd_save_setup() != 0) { + migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, + MIGRATION_STATUS_FAILED); + migrate_fd_cleanup(s); + return; + } qemu_thread_create(&s->thread, "live_migration", migration_thread, s, QEMU_THREAD_JOINABLE); s->migration_thread_running = true; @@ -2189,6 +2318,12 @@ static Property migration_properties[] = { DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState, parameters.x_checkpoint_delay, DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), + DEFINE_PROP_INT64("x-multifd-channels", MigrationState, + parameters.x_multifd_channels, + DEFAULT_MIGRATE_MULTIFD_CHANNELS), + DEFINE_PROP_INT64("x-multifd-page-count", MigrationState, + parameters.x_multifd_page_count, + DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT), /* Migration capabilities */ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), @@ -2202,6 +2337,7 @@ static Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD), DEFINE_PROP_END_OF_LIST(), }; @@ -2245,6 +2381,8 @@ static void migration_instance_init(Object *obj) params->has_downtime_limit = true; params->has_x_checkpoint_delay = true; params->has_block_incremental = true; + params->has_x_multifd_channels = true; + params->has_x_multifd_page_count = true; } /* diff --git a/migration/migration.h b/migration/migration.h index 148c9facbc..b83cceadc4 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -20,6 +20,7 @@ #include "exec/cpu-common.h" #include "qemu/coroutine_int.h" #include "hw/qdev.h" +#include "io/channel.h" /* State for the incoming migration */ struct MigrationIncomingState { @@ -152,6 +153,9 @@ struct MigrationState void migrate_set_state(int *state, int old_state, int new_state); void migration_fd_process_incoming(QEMUFile *f); +void migration_ioc_process_incoming(QIOChannel *ioc); + +bool migration_has_all_channels(void); uint64_t migrate_max_downtime(void); @@ -165,11 +169,16 @@ bool migration_is_blocked(Error **errp); bool migration_in_postcopy(void); MigrationState *migrate_get_current(void); +bool migrate_postcopy(void); + bool migrate_release_ram(void); bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); bool migrate_auto_converge(void); +bool migrate_use_multifd(void); +int migrate_multifd_channels(void); +int migrate_multifd_page_count(void); int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7e21e6fd36..0de68e8b25 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,15 +61,66 @@ struct PostcopyDiscardState { #include <sys/eventfd.h> #include <linux/userfaultfd.h> -static bool ufd_version_check(int ufd) + +/** + * receive_ufd_features: check userfault fd features, to request only supported + * features in the future. + * + * Returns: true on success + * + * __NR_userfaultfd - should be checked before + * @features: out parameter will contain uffdio_api.features provided by kernel + * in case of success + */ +static bool receive_ufd_features(uint64_t *features) { - struct uffdio_api api_struct; - uint64_t ioctl_mask; + struct uffdio_api api_struct = {0}; + int ufd; + bool ret = true; + + /* if we are here __NR_userfaultfd should exists */ + ufd = syscall(__NR_userfaultfd, O_CLOEXEC); + if (ufd == -1) { + error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, + strerror(errno)); + return false; + } + /* ask features */ api_struct.api = UFFD_API; api_struct.features = 0; if (ioctl(ufd, UFFDIO_API, &api_struct)) { - error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s", + error_report("%s: UFFDIO_API failed: %s", __func__, + strerror(errno)); + ret = false; + goto release_ufd; + } + + *features = api_struct.features; + +release_ufd: + close(ufd); + return ret; +} + +/** + * request_ufd_features: this function should be called only once on a newly + * opened ufd, subsequent calls will lead to error. + * + * Returns: true on succes + * + * @ufd: fd obtained from userfaultfd syscall + * @features: bit mask see UFFD_API_FEATURES + */ +static bool request_ufd_features(int ufd, uint64_t features) +{ + struct uffdio_api api_struct = {0}; + uint64_t ioctl_mask; + + api_struct.api = UFFD_API; + api_struct.features = features; + if (ioctl(ufd, UFFDIO_API, &api_struct)) { + error_report("%s failed: UFFDIO_API failed: %s", __func__, strerror(errno)); return false; } @@ -82,11 +133,42 @@ static bool ufd_version_check(int ufd) return false; } + return true; +} + +static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) +{ + uint64_t asked_features = 0; + static uint64_t supported_features; + + /* + * it's not possible to + * request UFFD_API twice per one fd + * userfault fd features is persistent + */ + if (!supported_features) { + if (!receive_ufd_features(&supported_features)) { + error_report("%s failed", __func__); + return false; + } + } + + /* + * request features, even if asked_features is 0, due to + * kernel expects UFFD_API before UFFDIO_REGISTER, per + * userfault file descriptor + */ + if (!request_ufd_features(ufd, asked_features)) { + error_report("%s failed: features %" PRIu64, __func__, + asked_features); + return false; + } + if (getpagesize() != ram_pagesize_summary()) { bool have_hp = false; /* We've got a huge page */ #ifdef UFFD_FEATURE_MISSING_HUGETLBFS - have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; + have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; #endif if (!have_hp) { error_report("Userfault on this host does not support huge pages"); @@ -124,7 +206,7 @@ static int test_ramblock_postcopiable(const char *block_name, void *host_addr, * normally fine since if the postcopy succeeds it gets turned back on at the * end. */ -bool postcopy_ram_supported_by_host(void) +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) { long pagesize = getpagesize(); int ufd = -1; @@ -147,7 +229,7 @@ bool postcopy_ram_supported_by_host(void) } /* Version and features check */ - if (!ufd_version_check(ufd)) { + if (!ufd_check_and_apply(ufd, mis)) { goto out; } @@ -523,7 +605,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Although the host check already tested the API, we need to * do the check again as an ABI handshake on the new fd. */ - if (!ufd_version_check(mis->userfault_fd)) { + if (!ufd_check_and_apply(mis->userfault_fd, mis)) { return -1; } @@ -661,7 +743,7 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis) #else /* No target OS support, stubs just fail */ -bool postcopy_ram_supported_by_host(void) +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) { error_report("%s: No OS support", __func__); return false; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 52d51e8007..587a8b86a7 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -14,7 +14,7 @@ #define QEMU_POSTCOPY_RAM_H /* Return true if the host supports everything we need to do postcopy-ram */ -bool postcopy_ram_supported_by_host(void); +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); /* * Make all of RAM sensitive to accesses to areas that haven't yet been written diff --git a/migration/ram.c b/migration/ram.c index e18b3e2d4f..88ca69e7b2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -356,6 +356,208 @@ static void compress_threads_save_setup(void) } } +/* Multiple fd's */ + +struct MultiFDSendParams { + uint8_t id; + char *name; + QemuThread thread; + QemuSemaphore sem; + QemuMutex mutex; + bool quit; +}; +typedef struct MultiFDSendParams MultiFDSendParams; + +struct { + MultiFDSendParams *params; + /* number of created threads */ + int count; +} *multifd_send_state; + +static void terminate_multifd_send_threads(Error *errp) +{ + int i; + + for (i = 0; i < multifd_send_state->count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + qemu_mutex_lock(&p->mutex); + p->quit = true; + qemu_sem_post(&p->sem); + qemu_mutex_unlock(&p->mutex); + } +} + +int multifd_save_cleanup(Error **errp) +{ + int i; + int ret = 0; + + if (!migrate_use_multifd()) { + return 0; + } + terminate_multifd_send_threads(NULL); + for (i = 0; i < multifd_send_state->count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + qemu_thread_join(&p->thread); + qemu_mutex_destroy(&p->mutex); + qemu_sem_destroy(&p->sem); + g_free(p->name); + p->name = NULL; + } + g_free(multifd_send_state->params); + multifd_send_state->params = NULL; + g_free(multifd_send_state); + multifd_send_state = NULL; + return ret; +} + +static void *multifd_send_thread(void *opaque) +{ + MultiFDSendParams *p = opaque; + + while (true) { + qemu_mutex_lock(&p->mutex); + if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; + } + qemu_mutex_unlock(&p->mutex); + qemu_sem_wait(&p->sem); + } + + return NULL; +} + +int multifd_save_setup(void) +{ + int thread_count; + uint8_t i; + + if (!migrate_use_multifd()) { + return 0; + } + thread_count = migrate_multifd_channels(); + multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->count = 0; + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + p->quit = false; + p->id = i; + p->name = g_strdup_printf("multifdsend_%d", i); + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); + + multifd_send_state->count++; + } + return 0; +} + +struct MultiFDRecvParams { + uint8_t id; + char *name; + QemuThread thread; + QemuSemaphore sem; + QemuMutex mutex; + bool quit; +}; +typedef struct MultiFDRecvParams MultiFDRecvParams; + +struct { + MultiFDRecvParams *params; + /* number of created threads */ + int count; +} *multifd_recv_state; + +static void terminate_multifd_recv_threads(Error *errp) +{ + int i; + + for (i = 0; i < multifd_recv_state->count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + qemu_mutex_lock(&p->mutex); + p->quit = true; + qemu_sem_post(&p->sem); + qemu_mutex_unlock(&p->mutex); + } +} + +int multifd_load_cleanup(Error **errp) +{ + int i; + int ret = 0; + + if (!migrate_use_multifd()) { + return 0; + } + terminate_multifd_recv_threads(NULL); + for (i = 0; i < multifd_recv_state->count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + qemu_thread_join(&p->thread); + qemu_mutex_destroy(&p->mutex); + qemu_sem_destroy(&p->sem); + g_free(p->name); + p->name = NULL; + } + g_free(multifd_recv_state->params); + multifd_recv_state->params = NULL; + g_free(multifd_recv_state); + multifd_recv_state = NULL; + + return ret; +} + +static void *multifd_recv_thread(void *opaque) +{ + MultiFDRecvParams *p = opaque; + + while (true) { + qemu_mutex_lock(&p->mutex); + if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; + } + qemu_mutex_unlock(&p->mutex); + qemu_sem_wait(&p->sem); + } + + return NULL; +} + +int multifd_load_setup(void) +{ + int thread_count; + uint8_t i; + + if (!migrate_use_multifd()) { + return 0; + } + thread_count = migrate_multifd_channels(); + multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); + multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); + multifd_recv_state->count = 0; + for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + p->quit = false; + p->id = i; + p->name = g_strdup_printf("multifdrecv_%d", i); + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + multifd_recv_state->count++; + } + return 0; +} + /** * save_page_header: write page header to wire * @@ -2074,8 +2276,12 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } - /* We can do postcopy, and all the data is postcopiable */ - *postcopiable_pending += remaining_size; + if (migrate_postcopy_ram()) { + /* We can do postcopy, and all the data is postcopiable */ + *postcopiable_pending += remaining_size; + } else { + *non_postcopiable_pending += remaining_size; + } } static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) @@ -2647,11 +2853,17 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) return ret; } +static bool ram_has_postcopy(void *opaque) +{ + return migrate_postcopy_ram(); +} + static SaveVMHandlers savevm_ram_handlers = { .save_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, .save_live_complete_postcopy = ram_save_complete, .save_live_complete_precopy = ram_save_complete, + .has_postcopy = ram_has_postcopy, .save_live_pending = ram_save_pending, .load_state = ram_load, .save_cleanup = ram_save_cleanup, diff --git a/migration/ram.h b/migration/ram.h index c081fde86c..4a72d66503 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -39,6 +39,11 @@ int64_t xbzrle_cache_resize(int64_t new_size); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); +int multifd_save_setup(void); +int multifd_save_cleanup(Error **errp); +int multifd_load_setup(void); +int multifd_load_cleanup(Error **errp); + uint64_t ram_pagesize_summary(void); int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); void acct_update_position(QEMUFile *f, size_t size, bool zero); diff --git a/migration/savevm.c b/migration/savevm.c index 7a55023d1a..2478352baf 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -89,7 +89,7 @@ static struct mig_cmd_args { [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" }, [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" }, [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" }, - [MIG_CMD_POSTCOPY_ADVISE] = { .len = 16, .name = "POSTCOPY_ADVISE" }, + [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" }, [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" }, [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" }, [MIG_CMD_POSTCOPY_RAM_DISCARD] = { @@ -98,6 +98,23 @@ static struct mig_cmd_args { [MIG_CMD_MAX] = { .len = -1, .name = "MAX" }, }; +/* Note for MIG_CMD_POSTCOPY_ADVISE: + * The format of arguments is depending on postcopy mode: + * - postcopy RAM only + * uint64_t host page size + * uint64_t taget page size + * + * - postcopy RAM and postcopy dirty bitmaps + * format is the same as for postcopy RAM only + * + * - postcopy dirty bitmaps only + * Nothing. Command length field is 0. + * + * Be careful: adding a new postcopy entity with some other parameters should + * not break format self-description ability. Good way is to introduce some + * generic extendable format with an exception for two old entities. + */ + static int announce_self_create(uint8_t *buf, uint8_t *mac_addr) { @@ -861,12 +878,17 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len) /* Send prior to any postcopy transfer */ void qemu_savevm_send_postcopy_advise(QEMUFile *f) { - uint64_t tmp[2]; - tmp[0] = cpu_to_be64(ram_pagesize_summary()); - tmp[1] = cpu_to_be64(qemu_target_page_size()); + if (migrate_postcopy_ram()) { + uint64_t tmp[2]; + tmp[0] = cpu_to_be64(ram_pagesize_summary()); + tmp[1] = cpu_to_be64(qemu_target_page_size()); - trace_qemu_savevm_send_postcopy_advise(); - qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp); + trace_qemu_savevm_send_postcopy_advise(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, + 16, (uint8_t *)tmp); + } else { + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL); + } } /* Sent prior to starting the destination running in postcopy, discard pages @@ -1008,7 +1030,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) * call that's already run, it might get confused if we call * iterate afterwards. */ - if (postcopy && !se->ops->save_live_complete_postcopy) { + if (postcopy && + !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) { continue; } if (qemu_file_rate_limit(f)) { @@ -1097,7 +1120,8 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || - (in_postcopy && se->ops->save_live_complete_postcopy) || + (in_postcopy && se->ops->has_postcopy && + se->ops->has_postcopy(se->opaque)) || (in_postcopy && !iterable_only) || !se->ops->save_live_complete_precopy) { continue; @@ -1352,7 +1376,11 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return -1; } - if (!postcopy_ram_supported_by_host()) { + if (!migrate_postcopy_ram()) { + return 0; + } + + if (!postcopy_ram_supported_by_host(mis)) { postcopy_state_set(POSTCOPY_INCOMING_NONE); return -1; } @@ -1562,7 +1590,9 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) * A rare case, we entered listen without having to do any discards, * so do the setup that's normally done at the time of the 1st discard. */ - postcopy_ram_prepare_discard(mis); + if (migrate_postcopy_ram()) { + postcopy_ram_prepare_discard(mis); + } } /* @@ -1570,8 +1600,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) * However, at this point the CPU shouldn't be running, and the IO * shouldn't be doing anything yet so don't actually expect requests */ - if (postcopy_ram_enable_notify(mis)) { - return -1; + if (migrate_postcopy_ram()) { + if (postcopy_ram_enable_notify(mis)) { + return -1; + } } if (mis->have_listen_thread) { diff --git a/migration/socket.c b/migration/socket.c index 757d3821a1..dee869044a 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -152,9 +152,13 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc, object_unref(OBJECT(sioc)); out: - /* Close listening socket as its no longer needed */ - qio_channel_close(ioc, NULL); - return FALSE; /* unregister */ + if (migration_has_all_channels()) { + /* Close listening socket as its no longer needed */ + qio_channel_close(ioc, NULL); + return G_SOURCE_REMOVE; + } else { + return G_SOURCE_CONTINUE; + } } diff --git a/qapi/migration.json b/qapi/migration.json index ee2b3b8733..f8b365e3f5 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -341,12 +341,14 @@ # @return-path: If enabled, migration will use the return path even # for precopy. (since 2.10) # +# @x-multifd: Use more than one fd for migration (since 2.11) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path' ] } + 'block', 'return-path', 'x-multifd' ] } ## # @MigrationCapabilityStatus: @@ -464,13 +466,22 @@ # migrated and the destination must already have access to the # same backing chain as was used on the source. (since 2.10) # +# @x-multifd-channels: Number of channels used to migrate data in +# parallel. This is the same number that the +# number of sockets used for migration. The +# default value is 2 (since 2.11) +# +# @x-multifd-page-count: Number of pages sent together to a thread +# The default value is 16 (since 2.11) +# # Since: 2.4 ## { 'enum': 'MigrationParameter', 'data': ['compress-level', 'compress-threads', 'decompress-threads', 'cpu-throttle-initial', 'cpu-throttle-increment', 'tls-creds', 'tls-hostname', 'max-bandwidth', - 'downtime-limit', 'x-checkpoint-delay', 'block-incremental' ] } + 'downtime-limit', 'x-checkpoint-delay', 'block-incremental', + 'x-multifd-channels', 'x-multifd-page-count' ] } ## # @MigrateSetParameters: @@ -526,6 +537,14 @@ # migrated and the destination must already have access to the # same backing chain as was used on the source. (since 2.10) # +# @x-multifd-channels: Number of channels used to migrate data in +# parallel. This is the same number that the +# number of sockets used for migration. The +# default value is 2 (since 2.11) +# +# @x-multifd-page-count: Number of pages sent together to a thread +# The default value is 16 (since 2.11) +# # Since: 2.4 ## # TODO either fuse back into MigrationParameters, or make @@ -541,7 +560,9 @@ '*max-bandwidth': 'int', '*downtime-limit': 'int', '*x-checkpoint-delay': 'int', - '*block-incremental': 'bool' } } + '*block-incremental': 'bool', + '*x-multifd-channels': 'int', + '*x-multifd-page-count': 'int' } } ## # @migrate-set-parameters: @@ -612,6 +633,14 @@ # migrated and the destination must already have access to the # same backing chain as was used on the source. (since 2.10) # +# @x-multifd-channels: Number of channels used to migrate data in +# parallel. This is the same number that the +# number of sockets used for migration. +# The default value is 2 (since 2.11) +# +# @x-multifd-page-count: Number of pages sent together to a thread +# The default value is 16 (since 2.11) +# # Since: 2.4 ## { 'struct': 'MigrationParameters', @@ -625,7 +654,9 @@ '*max-bandwidth': 'int', '*downtime-limit': 'int', '*x-checkpoint-delay': 'int', - '*block-incremental': 'bool' } } + '*block-incremental': 'bool' , + '*x-multifd-channels': 'int', + '*x-multifd-page-count': 'int' } } ## # @query-migrate-parameters: diff --git a/util/bitmap.c b/util/bitmap.c index efced9a7d8..cb618c65a5 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -355,3 +355,50 @@ int slow_bitmap_intersects(const unsigned long *bitmap1, } return 0; } + +long slow_bitmap_count_one(const unsigned long *bitmap, long nbits) +{ + long k, lim = nbits / BITS_PER_LONG, result = 0; + + for (k = 0; k < lim; k++) { + result += ctpopl(bitmap[k]); + } + + if (nbits % BITS_PER_LONG) { + result += ctpopl(bitmap[k] & BITMAP_LAST_WORD_MASK(nbits)); + } + + return result; +} + +static void bitmap_to_from_le(unsigned long *dst, + const unsigned long *src, long nbits) +{ + long len = BITS_TO_LONGS(nbits); + +#ifdef HOST_WORDS_BIGENDIAN + long index; + + for (index = 0; index < len; index++) { +# if HOST_LONG_BITS == 64 + dst[index] = bswap64(src[index]); +# else + dst[index] = bswap32(src[index]); +# endif + } +#else + memcpy(dst, src, len * sizeof(unsigned long)); +#endif +} + +void bitmap_from_le(unsigned long *dst, const unsigned long *src, + long nbits) +{ + bitmap_to_from_le(dst, src, nbits); +} + +void bitmap_to_le(unsigned long *dst, const unsigned long *src, + long nbits) +{ + bitmap_to_from_le(dst, src, nbits); +} diff --git a/util/bitops.c b/util/bitops.c index b0c35dd5f1..f2364015c4 100644 --- a/util/bitops.c +++ b/util/bitops.c @@ -14,15 +14,13 @@ #include "qemu/osdep.h" #include "qemu/bitops.h" -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - /* * Find the next set bit in a memory region. */ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); + const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; @@ -87,7 +85,7 @@ found_middle: unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); + const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; |