diff options
Diffstat (limited to 'migration/multifd.c')
-rw-r--r-- | migration/multifd.c | 417 |
1 files changed, 336 insertions, 81 deletions
diff --git a/migration/multifd.c b/migration/multifd.c index 6c07f19af1..d4a44da559 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -17,7 +17,8 @@ #include "exec/ramblock.h" #include "qemu/error-report.h" #include "qapi/error.h" -#include "ram.h" +#include "fd.h" +#include "file.h" #include "migration.h" #include "migration-stats.h" #include "socket.h" @@ -28,6 +29,7 @@ #include "threadinfo.h" #include "options.h" #include "qemu/yank.h" +#include "io/channel-file.h" #include "io/channel-socket.h" #include "yank_functions.h" @@ -81,9 +83,13 @@ struct { struct { MultiFDRecvParams *params; + MultiFDRecvData *data; /* number of created threads */ int count; - /* syncs main thread and channels */ + /* + * This is always posted by the recv threads, the migration thread + * uses it to wait for recv threads to finish assigned tasks. + */ QemuSemaphore sem_sync; /* global number of generated multifd packets */ uint64_t packet_num; @@ -92,6 +98,27 @@ struct { MultiFDMethods *ops; } *multifd_recv_state; +static bool multifd_use_packets(void) +{ + return !migrate_mapped_ram(); +} + +void multifd_send_channel_created(void) +{ + qemu_sem_post(&multifd_send_state->channels_created); +} + +static void multifd_set_file_bitmap(MultiFDSendParams *p) +{ + MultiFDPages_t *pages = p->pages; + + assert(pages->block); + + for (int i = 0; i < p->pages->num; i++) { + ramblock_set_file_bmap_atomic(pages->block, pages->offset[i]); + } +} + /* Multifd without compression */ /** @@ -122,6 +149,19 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) return; } +static void multifd_send_prepare_iovs(MultiFDSendParams *p) +{ + MultiFDPages_t *pages = p->pages; + + for (int i = 0; i < pages->num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; + p->iov[p->iovs_num].iov_len = p->page_size; + p->iovs_num++; + } + + p->next_packet_size = pages->num * p->page_size; +} + /** * nocomp_send_prepare: prepare date to be able to send * @@ -136,9 +176,15 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) { bool use_zero_copy_send = migrate_zero_copy_send(); - MultiFDPages_t *pages = p->pages; int ret; + if (!multifd_use_packets()) { + multifd_send_prepare_iovs(p); + multifd_set_file_bitmap(p); + + return 0; + } + if (!use_zero_copy_send) { /* * Only !zerocopy needs the header in IOV; zerocopy will @@ -147,13 +193,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) multifd_send_prepare_header(p); } - for (int i = 0; i < pages->num; i++) { - p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; - p->iov[p->iovs_num].iov_len = p->page_size; - p->iovs_num++; - } - - p->next_packet_size = pages->num * p->page_size; + multifd_send_prepare_iovs(p); p->flags |= MULTIFD_FLAG_NOCOMP; multifd_send_fill_packet(p); @@ -197,7 +237,7 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) } /** - * nocomp_recv_pages: read the data from the channel into actual pages + * nocomp_recv: read the data from the channel * * For no compression we just need to read things into the correct place. * @@ -206,9 +246,15 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) * @p: Params for the channel that we are using * @errp: pointer to an error */ -static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) +static int nocomp_recv(MultiFDRecvParams *p, Error **errp) { - uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + uint32_t flags; + + if (!multifd_use_packets()) { + return multifd_file_recv_data(p, errp); + } + + flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; if (flags != MULTIFD_FLAG_NOCOMP) { error_setg(errp, "multifd %u: flags received %x flags expected %x", @@ -228,7 +274,7 @@ static MultiFDMethods multifd_nocomp_ops = { .send_prepare = nocomp_send_prepare, .recv_setup = nocomp_recv_setup, .recv_cleanup = nocomp_recv_cleanup, - .recv_pages = nocomp_recv_pages + .recv = nocomp_recv }; static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = { @@ -663,6 +709,19 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) { if (p->c) { migration_ioc_unregister_yank(p->c); + /* + * An explicit close() on the channel here is normally not + * required, but can be helpful for "file:" iochannels, where it + * will include fdatasync() to make sure the data is flushed to the + * disk backend. + * + * The object_unref() cannot guarantee that because: (1) finalize() + * of the iochannel is only triggered on the last reference, and + * it's not guaranteed that we always hold the last refcount when + * reaching here, and, (2) even if finalize() is invoked, it only + * does a close(fd) without data flush. + */ + qio_channel_close(p->c, &error_abort); object_unref(OBJECT(p->c)); p->c = NULL; } @@ -684,6 +743,8 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) static void multifd_send_cleanup_state(void) { + file_cleanup_outgoing_migration(); + fd_cleanup_outgoing_migration(); socket_cleanup_outgoing_migration(); qemu_sem_destroy(&multifd_send_state->channels_created); qemu_sem_destroy(&multifd_send_state->channels_ready); @@ -795,15 +856,18 @@ static void *multifd_send_thread(void *opaque) MigrationThread *thread = NULL; Error *local_err = NULL; int ret = 0; + bool use_packets = multifd_use_packets(); thread = migration_threads_add(p->name, qemu_get_thread_id()); trace_multifd_send_thread_start(p->id); rcu_register_thread(); - if (multifd_send_initial_packet(p, &local_err) < 0) { - ret = -1; - goto out; + if (use_packets) { + if (multifd_send_initial_packet(p, &local_err) < 0) { + ret = -1; + goto out; + } } while (true) { @@ -829,8 +893,15 @@ static void *multifd_send_thread(void *opaque) break; } - ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, - 0, p->write_flags, &local_err); + if (migrate_mapped_ram()) { + ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num, + p->pages->block, &local_err); + } else { + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, + NULL, 0, p->write_flags, + &local_err); + } + if (ret != 0) { break; } @@ -854,16 +925,20 @@ static void *multifd_send_thread(void *opaque) * it doesn't require explicit memory barriers. */ assert(qatomic_read(&p->pending_sync)); - p->flags = MULTIFD_FLAG_SYNC; - multifd_send_fill_packet(p); - ret = qio_channel_write_all(p->c, (void *)p->packet, - p->packet_len, &local_err); - if (ret != 0) { - break; + + if (use_packets) { + p->flags = MULTIFD_FLAG_SYNC; + multifd_send_fill_packet(p); + ret = qio_channel_write_all(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret != 0) { + break; + } + /* p->next_packet_size will always be zero for a SYNC packet */ + stat64_add(&mig_stats.multifd_bytes, p->packet_len); + p->flags = 0; } - /* p->next_packet_size will always be zero for a SYNC packet */ - stat64_add(&mig_stats.multifd_bytes, p->packet_len); - p->flags = 0; + qatomic_set(&p->pending_sync, false); qemu_sem_post(&p->sem_sync); } @@ -939,7 +1014,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, return true; } -static void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc) +void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc) { qio_channel_set_delay(ioc, false); @@ -990,7 +1065,7 @@ out: * Here we're not interested whether creation succeeded, only that * it happened at all. */ - qemu_sem_post(&multifd_send_state->channels_created); + multifd_send_channel_created(); if (ret) { return; @@ -1007,9 +1082,14 @@ out: error_free(local_err); } -static void multifd_new_send_channel_create(gpointer opaque) +static bool multifd_new_send_channel_create(gpointer opaque, Error **errp) { + if (!multifd_use_packets()) { + return file_send_channel_create(opaque, errp); + } + socket_send_channel_create(multifd_new_send_channel_async, opaque); + return true; } bool multifd_send_setup(void) @@ -1018,6 +1098,7 @@ bool multifd_send_setup(void) Error *local_err = NULL; int thread_count, ret = 0; uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + bool use_packets = multifd_use_packets(); uint8_t i; if (!migrate_multifd()) { @@ -1040,18 +1121,27 @@ bool multifd_send_setup(void) qemu_sem_init(&p->sem_sync, 0); p->id = i; p->pages = multifd_pages_init(page_count); - p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(uint64_t) * page_count; - p->packet = g_malloc0(p->packet_len); - p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); - p->packet->version = cpu_to_be32(MULTIFD_VERSION); + + if (use_packets) { + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(uint64_t) * page_count; + p->packet = g_malloc0(p->packet_len); + p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + } else { + p->iov = g_new0(struct iovec, page_count); + } p->name = g_strdup_printf("multifdsend_%d", i); - /* We need one extra place for the packet header */ - p->iov = g_new0(struct iovec, page_count + 1); p->page_size = qemu_target_page_size(); p->page_count = page_count; p->write_flags = 0; - multifd_new_send_channel_create(p); + + if (!multifd_new_send_channel_create(p, &local_err)) { + return false; + } } /* @@ -1083,6 +1173,57 @@ bool multifd_send_setup(void) return true; } +bool multifd_recv(void) +{ + int i; + static int next_recv_channel; + MultiFDRecvParams *p = NULL; + MultiFDRecvData *data = multifd_recv_state->data; + + /* + * next_channel can remain from a previous migration that was + * using more channels, so ensure it doesn't overflow if the + * limit is lower now. + */ + next_recv_channel %= migrate_multifd_channels(); + for (i = next_recv_channel;; i = (i + 1) % migrate_multifd_channels()) { + if (multifd_recv_should_exit()) { + return false; + } + + p = &multifd_recv_state->params[i]; + + if (qatomic_read(&p->pending_job) == false) { + next_recv_channel = (i + 1) % migrate_multifd_channels(); + break; + } + } + + /* + * Order pending_job read before manipulating p->data below. Pairs + * with qatomic_store_release() at multifd_recv_thread(). + */ + smp_mb_acquire(); + + assert(!p->data->size); + multifd_recv_state->data = p->data; + p->data = data; + + /* + * Order p->data update before setting pending_job. Pairs with + * qatomic_load_acquire() at multifd_recv_thread(). + */ + qatomic_store_release(&p->pending_job, true); + qemu_sem_post(&p->sem); + + return true; +} + +MultiFDRecvData *multifd_get_recv_data(void) +{ + return multifd_recv_state->data; +} + static void multifd_recv_terminate_threads(Error *err) { int i; @@ -1107,10 +1248,27 @@ static void multifd_recv_terminate_threads(Error *err) MultiFDRecvParams *p = &multifd_recv_state->params[i]; /* - * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, - * however try to wakeup it without harm in cleanup phase. + * The migration thread and channels interact differently + * depending on the presence of packets. */ - qemu_sem_post(&p->sem_sync); + if (multifd_use_packets()) { + /* + * The channel receives as long as there are packets. When + * packets end (i.e. MULTIFD_FLAG_SYNC is reached), the + * channel waits for the migration thread to sync. If the + * sync never happens, do it here. + */ + qemu_sem_post(&p->sem_sync); + } else { + /* + * The channel waits for the migration thread to give it + * work. When the migration thread runs out of work, it + * releases the channel and waits for any pending work to + * finish. If we reach here (e.g. due to error) before the + * work runs out, release the channel. + */ + qemu_sem_post(&p->sem); + } /* * We could arrive here for two reasons: @@ -1138,6 +1296,7 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) p->c = NULL; qemu_mutex_destroy(&p->mutex); qemu_sem_destroy(&p->sem_sync); + qemu_sem_destroy(&p->sem); g_free(p->name); p->name = NULL; p->packet_len = 0; @@ -1155,6 +1314,8 @@ static void multifd_recv_cleanup_state(void) qemu_sem_destroy(&multifd_recv_state->sem_sync); g_free(multifd_recv_state->params); multifd_recv_state->params = NULL; + g_free(multifd_recv_state->data); + multifd_recv_state->data = NULL; g_free(multifd_recv_state); multifd_recv_state = NULL; } @@ -1182,18 +1343,53 @@ void multifd_recv_cleanup(void) void multifd_recv_sync_main(void) { + int thread_count = migrate_multifd_channels(); + bool file_based = !multifd_use_packets(); int i; if (!migrate_multifd()) { return; } - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDRecvParams *p = &multifd_recv_state->params[i]; - trace_multifd_recv_sync_main_wait(p->id); + /* + * File-based channels don't use packets and therefore need to + * wait for more work. Release them to start the sync. + */ + if (file_based) { + for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + trace_multifd_recv_sync_main_signal(p->id); + qemu_sem_post(&p->sem); + } + } + + /* + * Initiate the synchronization by waiting for all channels. + * + * For socket-based migration this means each channel has received + * the SYNC packet on the stream. + * + * For file-based migration this means each channel is done with + * the work (pending_job=false). + */ + for (i = 0; i < thread_count; i++) { + trace_multifd_recv_sync_main_wait(i); qemu_sem_wait(&multifd_recv_state->sem_sync); } - for (i = 0; i < migrate_multifd_channels(); i++) { + + if (file_based) { + /* + * For file-based loading is done in one iteration. We're + * done. + */ + return; + } + + /* + * Sync done. Release the channels for the next iteration. + */ + for (i = 0; i < thread_count; i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; WITH_QEMU_LOCK_GUARD(&p->mutex) { @@ -1211,46 +1407,87 @@ static void *multifd_recv_thread(void *opaque) { MultiFDRecvParams *p = opaque; Error *local_err = NULL; + bool use_packets = multifd_use_packets(); int ret; trace_multifd_recv_thread_start(p->id); rcu_register_thread(); while (true) { - uint32_t flags; + uint32_t flags = 0; + bool has_data = false; + p->normal_num = 0; - if (multifd_recv_should_exit()) { - break; - } + if (use_packets) { + if (multifd_recv_should_exit()) { + break; + } - ret = qio_channel_read_all_eof(p->c, (void *)p->packet, - p->packet_len, &local_err); - if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ - break; - } + ret = qio_channel_read_all_eof(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ + break; + } - qemu_mutex_lock(&p->mutex); - ret = multifd_recv_unfill_packet(p, &local_err); - if (ret) { + qemu_mutex_lock(&p->mutex); + ret = multifd_recv_unfill_packet(p, &local_err); + if (ret) { + qemu_mutex_unlock(&p->mutex); + break; + } + + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; + has_data = !!p->normal_num; qemu_mutex_unlock(&p->mutex); - break; - } + } else { + /* + * No packets, so we need to wait for the vmstate code to + * give us work. + */ + qemu_sem_wait(&p->sem); + + if (multifd_recv_should_exit()) { + break; + } + + /* pairs with qatomic_store_release() at multifd_recv() */ + if (!qatomic_load_acquire(&p->pending_job)) { + /* + * Migration thread did not send work, this is + * equivalent to pending_sync on the sending + * side. Post sem_sync to notify we reached this + * point. + */ + qemu_sem_post(&multifd_recv_state->sem_sync); + continue; + } - flags = p->flags; - /* recv methods don't know how to handle the SYNC flag */ - p->flags &= ~MULTIFD_FLAG_SYNC; - qemu_mutex_unlock(&p->mutex); + has_data = !!p->data->size; + } - if (p->normal_num) { - ret = multifd_recv_state->ops->recv_pages(p, &local_err); + if (has_data) { + ret = multifd_recv_state->ops->recv(p, &local_err); if (ret != 0) { break; } } - if (flags & MULTIFD_FLAG_SYNC) { - qemu_sem_post(&multifd_recv_state->sem_sync); - qemu_sem_wait(&p->sem_sync); + if (use_packets) { + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&multifd_recv_state->sem_sync); + qemu_sem_wait(&p->sem_sync); + } + } else { + p->total_normal_pages += p->data->size / qemu_target_page_size(); + p->data->size = 0; + /* + * Order data->size update before clearing + * pending_job. Pairs with smp_mb_acquire() at + * multifd_recv(). + */ + qatomic_store_release(&p->pending_job, false); } } @@ -1269,6 +1506,7 @@ int multifd_recv_setup(Error **errp) { int thread_count; uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + bool use_packets = multifd_use_packets(); uint8_t i; /* @@ -1282,6 +1520,10 @@ int multifd_recv_setup(Error **errp) thread_count = migrate_multifd_channels(); multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); + + multifd_recv_state->data = g_new0(MultiFDRecvData, 1); + multifd_recv_state->data->size = 0; + qatomic_set(&multifd_recv_state->count, 0); qatomic_set(&multifd_recv_state->exiting, 0); qemu_sem_init(&multifd_recv_state->sem_sync, 0); @@ -1292,10 +1534,18 @@ int multifd_recv_setup(Error **errp) qemu_mutex_init(&p->mutex); qemu_sem_init(&p->sem_sync, 0); + qemu_sem_init(&p->sem, 0); + p->pending_job = false; p->id = i; - p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(uint64_t) * page_count; - p->packet = g_malloc0(p->packet_len); + + p->data = g_new0(MultiFDRecvData, 1); + p->data->size = 0; + + if (use_packets) { + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(uint64_t) * page_count; + p->packet = g_malloc0(p->packet_len); + } p->name = g_strdup_printf("multifdrecv_%d", i); p->iov = g_new0(struct iovec, page_count); p->normal = g_new0(ram_addr_t, page_count); @@ -1339,18 +1589,23 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) { MultiFDRecvParams *p; Error *local_err = NULL; + bool use_packets = multifd_use_packets(); int id; - id = multifd_recv_initial_packet(ioc, &local_err); - if (id < 0) { - multifd_recv_terminate_threads(local_err); - error_propagate_prepend(errp, local_err, - "failed to receive packet" - " via multifd channel %d: ", - qatomic_read(&multifd_recv_state->count)); - return; + if (use_packets) { + id = multifd_recv_initial_packet(ioc, &local_err); + if (id < 0) { + multifd_recv_terminate_threads(local_err); + error_propagate_prepend(errp, local_err, + "failed to receive packet" + " via multifd channel %d: ", + qatomic_read(&multifd_recv_state->count)); + return; + } + trace_multifd_recv_new_channel(id); + } else { + id = qatomic_read(&multifd_recv_state->count); } - trace_multifd_recv_new_channel(id); p = &multifd_recv_state->params[id]; if (p->c != NULL) { |