/* * Multifd RAM migration without compression * * Copyright (c) 2019-2020 Red Hat Inc * * Authors: * Juan Quintela * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" #include "exec/ramblock.h" #include "exec/target_page.h" #include "file.h" #include "multifd.h" #include "options.h" #include "qapi/error.h" #include "qemu/cutils.h" #include "qemu/error-report.h" #include "trace.h" static MultiFDSendData *multifd_ram_send; size_t multifd_ram_payload_size(void) { uint32_t n = multifd_ram_page_count(); /* * We keep an array of page offsets at the end of MultiFDPages_t, * add space for it in the allocation. */ return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t); } void multifd_ram_save_setup(void) { multifd_ram_send = multifd_send_data_alloc(); } void multifd_ram_save_cleanup(void) { g_free(multifd_ram_send); multifd_ram_send = NULL; } static void multifd_set_file_bitmap(MultiFDSendParams *p) { MultiFDPages_t *pages = &p->data->u.ram; assert(pages->block); for (int i = 0; i < pages->normal_num; i++) { ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true); } for (int i = pages->normal_num; i < pages->num; i++) { ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false); } } static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp) { uint32_t page_count = multifd_ram_page_count(); if (migrate_zero_copy_send()) { p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; } if (!migrate_mapped_ram()) { /* We need one extra place for the packet header */ p->iov = g_new0(struct iovec, page_count + 1); } else { p->iov = g_new0(struct iovec, page_count); } return 0; } static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) { g_free(p->iov); p->iov = NULL; return; } static void multifd_send_prepare_iovs(MultiFDSendParams *p) { MultiFDPages_t *pages = &p->data->u.ram; uint32_t page_size = multifd_ram_page_size(); for (int i = 0; i < pages->normal_num; i++) { p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; p->iov[p->iovs_num].iov_len = page_size; p->iovs_num++; } p->next_packet_size = pages->normal_num * page_size; } static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp) { bool use_zero_copy_send = migrate_zero_copy_send(); int ret; multifd_send_zero_page_detect(p); if (migrate_mapped_ram()) { multifd_send_prepare_iovs(p); multifd_set_file_bitmap(p); return 0; } if (!use_zero_copy_send) { /* * Only !zerocopy needs the header in IOV; zerocopy will * send it separately. */ multifd_send_prepare_header(p); } multifd_send_prepare_iovs(p); p->flags |= MULTIFD_FLAG_NOCOMP; multifd_send_fill_packet(p); if (use_zero_copy_send) { /* Send header first, without zerocopy */ ret = qio_channel_write_all(p->c, (void *)p->packet, p->packet_len, errp); if (ret != 0) { return -1; } } return 0; } static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) { p->iov = g_new0(struct iovec, multifd_ram_page_count()); return 0; } static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p) { g_free(p->iov); p->iov = NULL; } static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp) { uint32_t flags; if (migrate_mapped_ram()) { return multifd_file_recv_data(p, errp); } flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; if (flags != MULTIFD_FLAG_NOCOMP) { error_setg(errp, "multifd %u: flags received %x flags expected %x", p->id, flags, MULTIFD_FLAG_NOCOMP); return -1; } multifd_recv_zero_page_process(p); if (!p->normal_num) { return 0; } for (int i = 0; i < p->normal_num; i++) { p->iov[i].iov_base = p->host + p->normal[i]; p->iov[i].iov_len = multifd_ram_page_size(); ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); } return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); } static void multifd_pages_reset(MultiFDPages_t *pages) { /* * We don't need to touch offset[] array, because it will be * overwritten later when reused. */ pages->num = 0; pages->normal_num = 0; pages->block = NULL; } void multifd_ram_fill_packet(MultiFDSendParams *p) { MultiFDPacket_t *packet = p->packet; MultiFDPages_t *pages = &p->data->u.ram; uint32_t zero_num = pages->num - pages->normal_num; packet->pages_alloc = cpu_to_be32(multifd_ram_page_count()); packet->normal_pages = cpu_to_be32(pages->normal_num); packet->zero_pages = cpu_to_be32(zero_num); if (pages->block) { pstrcpy(packet->ramblock, sizeof(packet->ramblock), pages->block->idstr); } for (int i = 0; i < pages->num; i++) { /* there are architectures where ram_addr_t is 32 bit */ uint64_t temp = pages->offset[i]; packet->offset[i] = cpu_to_be64(temp); } trace_multifd_send_ram_fill(p->id, pages->normal_num, zero_num); } int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp) { MultiFDPacket_t *packet = p->packet; uint32_t page_count = multifd_ram_page_count(); uint32_t page_size = multifd_ram_page_size(); uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc); int i; if (pages_per_packet > page_count) { error_setg(errp, "multifd: received packet with %u pages, expected %u", pages_per_packet, page_count); return -1; } p->normal_num = be32_to_cpu(packet->normal_pages); if (p->normal_num > pages_per_packet) { error_setg(errp, "multifd: received packet with %u non-zero pages, " "which exceeds maximum expected pages %u", p->normal_num, pages_per_packet); return -1; } p->zero_num = be32_to_cpu(packet->zero_pages); if (p->zero_num > pages_per_packet - p->normal_num) { error_setg(errp, "multifd: received packet with %u zero pages, expected maximum %u", p->zero_num, pages_per_packet - p->normal_num); return -1; } if (p->normal_num == 0 && p->zero_num == 0) { return 0; } /* make sure that ramblock is 0 terminated */ packet->ramblock[255] = 0; p->block = qemu_ram_block_by_name(packet->ramblock); if (!p->block) { error_setg(errp, "multifd: unknown ram block %s", packet->ramblock); return -1; } p->host = p->block->host; for (i = 0; i < p->normal_num; i++) { uint64_t offset = be64_to_cpu(packet->offset[i]); if (offset > (p->block->used_length - page_size)) { error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", offset, p->block->used_length); return -1; } p->normal[i] = offset; } for (i = 0; i < p->zero_num; i++) { uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); if (offset > (p->block->used_length - page_size)) { error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", offset, p->block->used_length); return -1; } p->zero[i] = offset; } return 0; } static inline bool multifd_queue_empty(MultiFDPages_t *pages) { return pages->num == 0; } static inline bool multifd_queue_full(MultiFDPages_t *pages) { return pages->num == multifd_ram_page_count(); } static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset) { pages->offset[pages->num++] = offset; } /* Returns true if enqueue successful, false otherwise */ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) { MultiFDPages_t *pages; retry: pages = &multifd_ram_send->u.ram; if (multifd_payload_empty(multifd_ram_send)) { multifd_pages_reset(pages); multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM); } /* If the queue is empty, we can already enqueue now */ if (multifd_queue_empty(pages)) { pages->block = block; multifd_enqueue(pages, offset); return true; } /* * Not empty, meanwhile we need a flush. It can because of either: * * (1) The page is not on the same ramblock of previous ones, or, * (2) The queue is full. * * After flush, always retry. */ if (pages->block != block || multifd_queue_full(pages)) { if (!multifd_send(&multifd_ram_send)) { return false; } goto retry; } /* Not empty, and we still have space, do it! */ multifd_enqueue(pages, offset); return true; } int multifd_ram_flush_and_sync(void) { if (!migrate_multifd()) { return 0; } if (!multifd_payload_empty(multifd_ram_send)) { if (!multifd_send(&multifd_ram_send)) { error_report("%s: multifd_send fail", __func__); return -1; } } return multifd_send_sync_main(); } bool multifd_send_prepare_common(MultiFDSendParams *p) { MultiFDPages_t *pages = &p->data->u.ram; multifd_send_zero_page_detect(p); if (!pages->normal_num) { p->next_packet_size = 0; return false; } multifd_send_prepare_header(p); return true; } static const MultiFDMethods multifd_nocomp_ops = { .send_setup = multifd_nocomp_send_setup, .send_cleanup = multifd_nocomp_send_cleanup, .send_prepare = multifd_nocomp_send_prepare, .recv_setup = multifd_nocomp_recv_setup, .recv_cleanup = multifd_nocomp_recv_cleanup, .recv = multifd_nocomp_recv }; static void multifd_nocomp_register(void) { multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops); } migration_init(multifd_nocomp_register);