diff options
Diffstat (limited to 'migration/ram.c')
-rw-r--r-- | migration/ram.c | 351 |
1 files changed, 333 insertions, 18 deletions
diff --git a/migration/ram.c b/migration/ram.c index 83fd780fc2..003c28e133 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -94,6 +94,24 @@ #define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 /* We can't use any flag that is bigger than 0x200 */ +/* + * mapped-ram migration supports O_DIRECT, so we need to make sure the + * userspace buffer, the IO operation size and the file offset are + * aligned according to the underlying device's block size. The first + * two are already aligned to page size, but we need to add padding to + * the file to align the offset. We cannot read the block size + * dynamically because the migration file can be moved between + * different systems, so use 1M to cover most block sizes and to keep + * the file offset aligned at page size as well. + */ +#define MAPPED_RAM_FILE_OFFSET_ALIGNMENT 0x100000 + +/* + * When doing mapped-ram migration, this is the amount we read from + * the pages region in the migration file at a time. + */ +#define MAPPED_RAM_LOAD_BUF_SIZE 0x100000 + XBZRLECacheStats xbzrle_counters; /* used by the search for pages to send */ @@ -1126,12 +1144,18 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, return 0; } + stat64_add(&mig_stats.zero_pages, 1); + + if (migrate_mapped_ram()) { + /* zero pages are not transferred with mapped-ram */ + clear_bit_atomic(offset >> TARGET_PAGE_BITS, pss->block->file_bmap); + return 1; + } + len += save_page_header(pss, file, pss->block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; ram_release_page(pss->block->idstr, offset); - - stat64_add(&mig_stats.zero_pages, 1); ram_transferred_add(len); /* @@ -1189,14 +1213,20 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, { QEMUFile *file = pss->pss_channel; - ram_transferred_add(save_page_header(pss, pss->pss_channel, block, - offset | RAM_SAVE_FLAG_PAGE)); - if (async) { - qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE, - migrate_release_ram() && - migration_in_postcopy()); + if (migrate_mapped_ram()) { + qemu_put_buffer_at(file, buf, TARGET_PAGE_SIZE, + block->pages_offset + offset); + set_bit(offset >> TARGET_PAGE_BITS, block->file_bmap); } else { - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + ram_transferred_add(save_page_header(pss, pss->pss_channel, block, + offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE, + migrate_release_ram() && + migration_in_postcopy()); + } else { + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } } ram_transferred_add(TARGET_PAGE_SIZE); stat64_add(&mig_stats.normal_pages, 1); @@ -1332,14 +1362,18 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { if (migrate_multifd() && - !migrate_multifd_flush_after_each_section()) { + (!migrate_multifd_flush_after_each_section() || + migrate_mapped_ram())) { QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; int ret = multifd_send_sync_main(); if (ret < 0) { return ret; } - qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); - qemu_fflush(f); + + if (!migrate_mapped_ram()) { + qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); + qemu_fflush(f); + } } /* * If memory migration starts over, we will meet a dirtied page @@ -2778,6 +2812,9 @@ static void ram_list_init_bitmaps(void) */ block->bmap = bitmap_new(pages); bitmap_set(block->bmap, 0, pages); + if (migrate_mapped_ram()) { + block->file_bmap = bitmap_new(pages); + } block->clear_bmap_shift = shift; block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift)); } @@ -2915,6 +2952,89 @@ void qemu_guest_free_page_hint(void *addr, size_t len) } } +#define MAPPED_RAM_HDR_VERSION 1 +struct MappedRamHeader { + uint32_t version; + /* + * The target's page size, so we know how many pages are in the + * bitmap. + */ + uint64_t page_size; + /* + * The offset in the migration file where the pages bitmap is + * stored. + */ + uint64_t bitmap_offset; + /* + * The offset in the migration file where the actual pages (data) + * are stored. + */ + uint64_t pages_offset; +} QEMU_PACKED; +typedef struct MappedRamHeader MappedRamHeader; + +static void mapped_ram_setup_ramblock(QEMUFile *file, RAMBlock *block) +{ + g_autofree MappedRamHeader *header = NULL; + size_t header_size, bitmap_size; + long num_pages; + + header = g_new0(MappedRamHeader, 1); + header_size = sizeof(MappedRamHeader); + + num_pages = block->used_length >> TARGET_PAGE_BITS; + bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long); + + /* + * Save the file offsets of where the bitmap and the pages should + * go as they are written at the end of migration and during the + * iterative phase, respectively. + */ + block->bitmap_offset = qemu_get_offset(file) + header_size; + block->pages_offset = ROUND_UP(block->bitmap_offset + + bitmap_size, + MAPPED_RAM_FILE_OFFSET_ALIGNMENT); + + header->version = cpu_to_be32(MAPPED_RAM_HDR_VERSION); + header->page_size = cpu_to_be64(TARGET_PAGE_SIZE); + header->bitmap_offset = cpu_to_be64(block->bitmap_offset); + header->pages_offset = cpu_to_be64(block->pages_offset); + + qemu_put_buffer(file, (uint8_t *) header, header_size); + + /* prepare offset for next ramblock */ + qemu_set_offset(file, block->pages_offset + block->used_length, SEEK_SET); +} + +static bool mapped_ram_read_header(QEMUFile *file, MappedRamHeader *header, + Error **errp) +{ + size_t ret, header_size = sizeof(MappedRamHeader); + + ret = qemu_get_buffer(file, (uint8_t *)header, header_size); + if (ret != header_size) { + error_setg(errp, "Could not read whole mapped-ram migration header " + "(expected %zd, got %zd bytes)", header_size, ret); + return false; + } + + /* migration stream is big-endian */ + header->version = be32_to_cpu(header->version); + + if (header->version > MAPPED_RAM_HDR_VERSION) { + error_setg(errp, "Migration mapped-ram capability version not " + "supported (expected <= %d, got %d)", MAPPED_RAM_HDR_VERSION, + header->version); + return false; + } + + header->page_size = be64_to_cpu(header->page_size); + header->bitmap_offset = be64_to_cpu(header->bitmap_offset); + header->pages_offset = be64_to_cpu(header->pages_offset); + + return true; +} + /* * Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code @@ -2970,6 +3090,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) if (migrate_ignore_shared()) { qemu_put_be64(f, block->mr->addr); } + + if (migrate_mapped_ram()) { + mapped_ram_setup_ramblock(f, block); + } } } @@ -2995,7 +3119,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return ret; } - if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && !migrate_multifd_flush_after_each_section() + && !migrate_mapped_ram()) { qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); } @@ -3003,6 +3128,33 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return qemu_fflush(f); } +static void ram_save_file_bmap(QEMUFile *f) +{ + RAMBlock *block; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + long num_pages = block->used_length >> TARGET_PAGE_BITS; + long bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long); + + qemu_put_buffer_at(f, (uint8_t *)block->file_bmap, bitmap_size, + block->bitmap_offset); + ram_transferred_add(bitmap_size); + + /* + * Free the bitmap here to catch any synchronization issues + * with multifd channels. No channels should be sending pages + * after we've written the bitmap to file. + */ + g_free(block->file_bmap); + block->file_bmap = NULL; + } +} + +void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset) +{ + set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap); +} + /** * ram_save_iterate: iterative stage for migration * @@ -3112,7 +3264,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) out: if (ret >= 0 && migration_is_setup_or_active(migrate_get_current()->state)) { - if (migrate_multifd() && migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && migrate_multifd_flush_after_each_section() && + !migrate_mapped_ram()) { ret = multifd_send_sync_main(); if (ret < 0) { return ret; @@ -3192,7 +3345,20 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return ret; } - if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) { + if (migrate_mapped_ram()) { + ram_save_file_bmap(f); + + if (qemu_file_get_error(f)) { + Error *local_err = NULL; + int err = qemu_file_get_error_obj(f, &local_err); + + error_reportf_err(local_err, "Failed to write bitmap to file: "); + return -err; + } + } + + if (migrate_multifd() && !migrate_multifd_flush_after_each_section() && + !migrate_mapped_ram()) { qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); @@ -3792,23 +3958,149 @@ void colo_flush_ram_cache(void) trace_colo_flush_ram_cache_end(); } +static size_t ram_load_multifd_pages(void *host_addr, size_t size, + uint64_t offset) +{ + MultiFDRecvData *data = multifd_get_recv_data(); + + data->opaque = host_addr; + data->file_offset = offset; + data->size = size; + + if (!multifd_recv()) { + return 0; + } + + return size; +} + +static bool read_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block, + long num_pages, unsigned long *bitmap, + Error **errp) +{ + ERRP_GUARD(); + unsigned long set_bit_idx, clear_bit_idx; + ram_addr_t offset; + void *host; + size_t read, unread, size; + + for (set_bit_idx = find_first_bit(bitmap, num_pages); + set_bit_idx < num_pages; + set_bit_idx = find_next_bit(bitmap, num_pages, clear_bit_idx + 1)) { + + clear_bit_idx = find_next_zero_bit(bitmap, num_pages, set_bit_idx + 1); + + unread = TARGET_PAGE_SIZE * (clear_bit_idx - set_bit_idx); + offset = set_bit_idx << TARGET_PAGE_BITS; + + while (unread > 0) { + host = host_from_ram_block_offset(block, offset); + if (!host) { + error_setg(errp, "page outside of ramblock %s range", + block->idstr); + return false; + } + + size = MIN(unread, MAPPED_RAM_LOAD_BUF_SIZE); + + if (migrate_multifd()) { + read = ram_load_multifd_pages(host, size, + block->pages_offset + offset); + } else { + read = qemu_get_buffer_at(f, host, size, + block->pages_offset + offset); + } + + if (!read) { + goto err; + } + offset += read; + unread -= read; + } + } + + return true; + +err: + qemu_file_get_error_obj(f, errp); + error_prepend(errp, "(%s) failed to read page " RAM_ADDR_FMT + "from file offset %" PRIx64 ": ", block->idstr, offset, + block->pages_offset + offset); + return false; +} + +static void parse_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block, + ram_addr_t length, Error **errp) +{ + g_autofree unsigned long *bitmap = NULL; + MappedRamHeader header; + size_t bitmap_size; + long num_pages; + + if (!mapped_ram_read_header(f, &header, errp)) { + return; + } + + block->pages_offset = header.pages_offset; + + /* + * Check the alignment of the file region that contains pages. We + * don't enforce MAPPED_RAM_FILE_OFFSET_ALIGNMENT to allow that + * value to change in the future. Do only a sanity check with page + * size alignment. + */ + if (!QEMU_IS_ALIGNED(block->pages_offset, TARGET_PAGE_SIZE)) { + error_setg(errp, + "Error reading ramblock %s pages, region has bad alignment", + block->idstr); + return; + } + + num_pages = length / header.page_size; + bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long); + + bitmap = g_malloc0(bitmap_size); + if (qemu_get_buffer_at(f, (uint8_t *)bitmap, bitmap_size, + header.bitmap_offset) != bitmap_size) { + error_setg(errp, "Error reading dirty bitmap"); + return; + } + + if (!read_ramblock_mapped_ram(f, block, num_pages, bitmap, errp)) { + return; + } + + /* Skip pages array */ + qemu_set_offset(f, block->pages_offset + length, SEEK_SET); + + return; +} + static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) { int ret = 0; /* ADVISE is earlier, it shows the source has the postcopy capability on */ bool postcopy_advised = migration_incoming_postcopy_advised(); int max_hg_page_size; + Error *local_err = NULL; assert(block); + if (migrate_mapped_ram()) { + parse_ramblock_mapped_ram(f, block, length, &local_err); + if (local_err) { + error_report_err(local_err); + return -EINVAL; + } + return 0; + } + if (!qemu_ram_is_migratable(block)) { error_report("block %s should not be migrated !", block->idstr); return -EINVAL; } if (length != block->used_length) { - Error *local_err = NULL; - ret = qemu_ram_resize(block, length, &local_err); if (local_err) { error_report_err(local_err); @@ -3899,6 +4191,12 @@ static int ram_load_precopy(QEMUFile *f) invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; } + if (migrate_mapped_ram()) { + invalid_flags |= (RAM_SAVE_FLAG_HOOK | RAM_SAVE_FLAG_MULTIFD_FLUSH | + RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_XBZRLE | + RAM_SAVE_FLAG_ZERO); + } + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; void *host = NULL, *host_bak = NULL; @@ -3920,6 +4218,8 @@ static int ram_load_precopy(QEMUFile *f) addr &= TARGET_PAGE_MASK; if (flags & invalid_flags) { + error_report("Unexpected RAM flags: %d", flags & invalid_flags); + if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { error_report("Received an unexpected compressed page"); } @@ -3972,6 +4272,16 @@ static int ram_load_precopy(QEMUFile *f) switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_MEM_SIZE: ret = parse_ramblocks(f, addr); + /* + * For mapped-ram migration (to a file) using multifd, we sync + * once and for all here to make sure all tasks we queued to + * multifd threads are completed, so that all the ramblocks + * (including all the guest memory pages within) are fully + * loaded after this sync returns. + */ + if (migrate_mapped_ram()) { + multifd_recv_sync_main(); + } break; case RAM_SAVE_FLAG_ZERO: @@ -4012,7 +4322,12 @@ static int ram_load_precopy(QEMUFile *f) case RAM_SAVE_FLAG_EOS: /* normal exit */ if (migrate_multifd() && - migrate_multifd_flush_after_each_section()) { + migrate_multifd_flush_after_each_section() && + /* + * Mapped-ram migration flushes once and for all after + * parsing ramblocks. Always ignore EOS for it. + */ + !migrate_mapped_ram()) { multifd_recv_sync_main(); } break; |