diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-06-12 11:06:03 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-06-12 11:06:03 +0100 |
commit | a4ef02fd9b3d12b105b56942166c8364ade9be0f (patch) | |
tree | c4d824efda7fa533e3e6c32fa5e7a16cf72afc54 | |
parent | d8e3b729cf452d2689c8669f1ec18158db29fd5a (diff) | |
parent | 4fa3dd17dc29c316726f0d4a354a4d895e130c73 (diff) |
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20150612' into staging
migration/next for 20150612
# gpg: Signature made Fri Jun 12 05:56:21 2015 BST using RSA key ID 5872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg: aka "Juan Quintela <quintela@trasno.org>"
* remotes/juanquintela/tags/migration/20150612: (21 commits)
Remove unneeded memset
Rename RDMA structures to make destination clear
Teach analyze-migration.py about section footers
Add a protective section footer
Disable section footers on older machine types
Merge section header writing
Move loadvm_handlers into MigrationIncomingState
Move copy out of qemu_peek_buffer
Create MigrationIncomingState
qemu_ram_foreach_block: pass up error value, and down the ramblock name
Split header writing out of qemu_savevm_state_begin
Add qemu_get_counted_string to read a string prefixed by a count byte
migration: Use normal VMStateDescriptions for Subsections
migration: create savevm_state
migration: Remove duplicated assignment of SETUP status
rdma: Fix qemu crash when IPv6 address is used for migration
arch_init: Clean up the duplicate variable 'len' defining in ram_load()
migration: reduce include files
migration: Add myself to the copyright list of both files
migration: move savevm.c inside migration/
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
49 files changed, 2191 insertions, 2144 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 4ed82154ce..e728d3a1d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1014,8 +1014,6 @@ M: Amit Shah <amit.shah@redhat.com> S: Maintained F: include/migration/ F: migration/ -F: savevm.c -F: arch_init.c F: scripts/vmstate-static-checker.py F: tests/vmstate-static-checker-data/ diff --git a/Makefile.target b/Makefile.target index ec5b92cb60..3e7aafd72d 100644 --- a/Makefile.target +++ b/Makefile.target @@ -132,9 +132,10 @@ obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o bootdevice.o obj-y += hw/ obj-$(CONFIG_KVM) += kvm-all.o -obj-y += memory.o savevm.o cputlb.o +obj-y += memory.o cputlb.o obj-y += memory_mapping.o obj-y += dump.o +obj-y += migration/ram.o migration/savevm.o LIBS := $(libs_softmmu) $(LIBS) # xen support diff --git a/arch_init.c b/arch_init.c index d29447497b..725c638ece 100644 --- a/arch_init.c +++ b/arch_init.c @@ -22,46 +22,15 @@ * THE SOFTWARE. */ #include <stdint.h> -#include <stdarg.h> -#include <stdlib.h> -#include <zlib.h> -#ifndef _WIN32 -#include <sys/types.h> -#include <sys/mman.h> -#endif -#include "config.h" -#include "monitor/monitor.h" #include "sysemu/sysemu.h" -#include "qemu/bitops.h" -#include "qemu/bitmap.h" #include "sysemu/arch_init.h" -#include "audio/audio.h" -#include "hw/i386/pc.h" #include "hw/pci/pci.h" #include "hw/audio/audio.h" -#include "sysemu/kvm.h" -#include "migration/migration.h" #include "hw/i386/smbios.h" -#include "exec/address-spaces.h" -#include "hw/audio/pcspk.h" -#include "migration/page_cache.h" #include "qemu/config-file.h" #include "qemu/error-report.h" #include "qmp-commands.h" -#include "trace.h" -#include "exec/cpu-all.h" -#include "exec/ram_addr.h" #include "hw/acpi/acpi.h" -#include "qemu/host-utils.h" -#include "qemu/rcu_queue.h" - -#ifdef DEBUG_ARCH_INIT -#define DPRINTF(fmt, ...) \ - do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif #ifdef TARGET_SPARC int graphic_width = 1024; @@ -111,24 +80,6 @@ int graphic_depth = 32; #endif const uint32_t arch_type = QEMU_ARCH; -static bool mig_throttle_on; -static int dirty_rate_high_cnt; -static void check_guest_throttling(void); - -static uint64_t bitmap_sync_count; - -/***********************************************************/ -/* ram save/restore */ - -#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ -#define RAM_SAVE_FLAG_COMPRESS 0x02 -#define RAM_SAVE_FLAG_MEM_SIZE 0x04 -#define RAM_SAVE_FLAG_PAGE 0x08 -#define RAM_SAVE_FLAG_EOS 0x10 -#define RAM_SAVE_FLAG_CONTINUE 0x20 -#define RAM_SAVE_FLAG_XBZRLE 0x40 -/* 0x80 is reserved in migration.h start with 0x100 next */ -#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 static struct defconfig_file { const char *filename; @@ -139,8 +90,6 @@ static struct defconfig_file { { NULL }, /* end of list */ }; -static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; - int qemu_read_default_config_files(bool userconfig) { int ret; @@ -159,1517 +108,6 @@ int qemu_read_default_config_files(bool userconfig) return 0; } -static inline bool is_zero_range(uint8_t *p, uint64_t size) -{ - return buffer_find_nonzero_offset(p, size) == size; -} - -/* struct contains XBZRLE cache and a static page - used by the compression */ -static struct { - /* buffer used for XBZRLE encoding */ - uint8_t *encoded_buf; - /* buffer for storing page content */ - uint8_t *current_buf; - /* Cache for XBZRLE, Protected by lock. */ - PageCache *cache; - QemuMutex lock; -} XBZRLE; - -/* buffer used for XBZRLE decoding */ -static uint8_t *xbzrle_decoded_buf; - -static void XBZRLE_cache_lock(void) -{ - if (migrate_use_xbzrle()) - qemu_mutex_lock(&XBZRLE.lock); -} - -static void XBZRLE_cache_unlock(void) -{ - if (migrate_use_xbzrle()) - qemu_mutex_unlock(&XBZRLE.lock); -} - -/* - * called from qmp_migrate_set_cache_size in main thread, possibly while - * a migration is in progress. - * A running migration maybe using the cache and might finish during this - * call, hence changes to the cache are protected by XBZRLE.lock(). - */ -int64_t xbzrle_cache_resize(int64_t new_size) -{ - PageCache *new_cache; - int64_t ret; - - if (new_size < TARGET_PAGE_SIZE) { - return -1; - } - - XBZRLE_cache_lock(); - - if (XBZRLE.cache != NULL) { - if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { - goto out_new_size; - } - new_cache = cache_init(new_size / TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); - if (!new_cache) { - error_report("Error creating cache"); - ret = -1; - goto out; - } - - cache_fini(XBZRLE.cache); - XBZRLE.cache = new_cache; - } - -out_new_size: - ret = pow2floor(new_size); -out: - XBZRLE_cache_unlock(); - return ret; -} - -/* accounting for migration statistics */ -typedef struct AccountingInfo { - uint64_t dup_pages; - uint64_t skipped_pages; - uint64_t norm_pages; - uint64_t iterations; - uint64_t xbzrle_bytes; - uint64_t xbzrle_pages; - uint64_t xbzrle_cache_miss; - double xbzrle_cache_miss_rate; - uint64_t xbzrle_overflows; -} AccountingInfo; - -static AccountingInfo acct_info; - -static void acct_clear(void) -{ - memset(&acct_info, 0, sizeof(acct_info)); -} - -uint64_t dup_mig_bytes_transferred(void) -{ - return acct_info.dup_pages * TARGET_PAGE_SIZE; -} - -uint64_t dup_mig_pages_transferred(void) -{ - return acct_info.dup_pages; -} - -uint64_t skipped_mig_bytes_transferred(void) -{ - return acct_info.skipped_pages * TARGET_PAGE_SIZE; -} - -uint64_t skipped_mig_pages_transferred(void) -{ - return acct_info.skipped_pages; -} - -uint64_t norm_mig_bytes_transferred(void) -{ - return acct_info.norm_pages * TARGET_PAGE_SIZE; -} - -uint64_t norm_mig_pages_transferred(void) -{ - return acct_info.norm_pages; -} - -uint64_t xbzrle_mig_bytes_transferred(void) -{ - return acct_info.xbzrle_bytes; -} - -uint64_t xbzrle_mig_pages_transferred(void) -{ - return acct_info.xbzrle_pages; -} - -uint64_t xbzrle_mig_pages_cache_miss(void) -{ - return acct_info.xbzrle_cache_miss; -} - -double xbzrle_mig_cache_miss_rate(void) -{ - return acct_info.xbzrle_cache_miss_rate; -} - -uint64_t xbzrle_mig_pages_overflow(void) -{ - return acct_info.xbzrle_overflows; -} - -/* This is the last block that we have visited serching for dirty pages - */ -static RAMBlock *last_seen_block; -/* This is the last block from where we have sent data */ -static RAMBlock *last_sent_block; -static ram_addr_t last_offset; -static unsigned long *migration_bitmap; -static uint64_t migration_dirty_pages; -static uint32_t last_version; -static bool ram_bulk_stage; - -struct CompressParam { - bool start; - bool done; - QEMUFile *file; - QemuMutex mutex; - QemuCond cond; - RAMBlock *block; - ram_addr_t offset; -}; -typedef struct CompressParam CompressParam; - -struct DecompressParam { - bool start; - QemuMutex mutex; - QemuCond cond; - void *des; - uint8 *compbuf; - int len; -}; -typedef struct DecompressParam DecompressParam; - -static CompressParam *comp_param; -static QemuThread *compress_threads; -/* comp_done_cond is used to wake up the migration thread when - * one of the compression threads has finished the compression. - * comp_done_lock is used to co-work with comp_done_cond. - */ -static QemuMutex *comp_done_lock; -static QemuCond *comp_done_cond; -/* The empty QEMUFileOps will be used by file in CompressParam */ -static const QEMUFileOps empty_ops = { }; - -static bool compression_switch; -static bool quit_comp_thread; -static bool quit_decomp_thread; -static DecompressParam *decomp_param; -static QemuThread *decompress_threads; -static uint8_t *compressed_data_buf; - -static int do_compress_ram_page(CompressParam *param); - -static void *do_data_compress(void *opaque) -{ - CompressParam *param = opaque; - - while (!quit_comp_thread) { - qemu_mutex_lock(¶m->mutex); - /* Re-check the quit_comp_thread in case of - * terminate_compression_threads is called just before - * qemu_mutex_lock(¶m->mutex) and after - * while(!quit_comp_thread), re-check it here can make - * sure the compression thread terminate as expected. - */ - while (!param->start && !quit_comp_thread) { - qemu_cond_wait(¶m->cond, ¶m->mutex); - } - if (!quit_comp_thread) { - do_compress_ram_page(param); - } - param->start = false; - qemu_mutex_unlock(¶m->mutex); - - qemu_mutex_lock(comp_done_lock); - param->done = true; - qemu_cond_signal(comp_done_cond); - qemu_mutex_unlock(comp_done_lock); - } - - return NULL; -} - -static inline void terminate_compression_threads(void) -{ - int idx, thread_count; - - thread_count = migrate_compress_threads(); - quit_comp_thread = true; - for (idx = 0; idx < thread_count; idx++) { - qemu_mutex_lock(&comp_param[idx].mutex); - qemu_cond_signal(&comp_param[idx].cond); - qemu_mutex_unlock(&comp_param[idx].mutex); - } -} - -void migrate_compress_threads_join(void) -{ - int i, thread_count; - - if (!migrate_use_compression()) { - return; - } - terminate_compression_threads(); - thread_count = migrate_compress_threads(); - for (i = 0; i < thread_count; i++) { - qemu_thread_join(compress_threads + i); - qemu_fclose(comp_param[i].file); - qemu_mutex_destroy(&comp_param[i].mutex); - qemu_cond_destroy(&comp_param[i].cond); - } - qemu_mutex_destroy(comp_done_lock); - qemu_cond_destroy(comp_done_cond); - g_free(compress_threads); - g_free(comp_param); - g_free(comp_done_cond); - g_free(comp_done_lock); - compress_threads = NULL; - comp_param = NULL; - comp_done_cond = NULL; - comp_done_lock = NULL; -} - -void migrate_compress_threads_create(void) -{ - int i, thread_count; - - if (!migrate_use_compression()) { - return; - } - quit_comp_thread = false; - compression_switch = true; - thread_count = migrate_compress_threads(); - compress_threads = g_new0(QemuThread, thread_count); - comp_param = g_new0(CompressParam, thread_count); - comp_done_cond = g_new0(QemuCond, 1); - comp_done_lock = g_new0(QemuMutex, 1); - qemu_cond_init(comp_done_cond); - qemu_mutex_init(comp_done_lock); - for (i = 0; i < thread_count; i++) { - /* com_param[i].file is just used as a dummy buffer to save data, set - * it's ops to empty. - */ - comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); - comp_param[i].done = true; - qemu_mutex_init(&comp_param[i].mutex); - qemu_cond_init(&comp_param[i].cond); - qemu_thread_create(compress_threads + i, "compress", - do_data_compress, comp_param + i, - QEMU_THREAD_JOINABLE); - } -} - -/** - * save_page_header: Write page header to wire - * - * If this is the 1st block, it also writes the block identification - * - * Returns: Number of bytes written - * - * @f: QEMUFile where to send the data - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * in the lower bits, it contains flags - */ -static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) -{ - size_t size; - - qemu_put_be64(f, offset); - size = 8; - - if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, - strlen(block->idstr)); - size += 1 + strlen(block->idstr); - } - return size; -} - -/* Update the xbzrle cache to reflect a page that's been sent as all 0. - * The important thing is that a stale (not-yet-0'd) page be replaced - * by the new data. - * As a bonus, if the page wasn't in the cache it gets added so that - * when a small write is made into the 0'd page it gets XBZRLE sent - */ -static void xbzrle_cache_zero_page(ram_addr_t current_addr) -{ - if (ram_bulk_stage || !migrate_use_xbzrle()) { - return; - } - - /* We don't care if this fails to allocate a new cache page - * as long as it updated an old one */ - cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, - bitmap_sync_count); -} - -#define ENCODING_FLAG_XBZRLE 0x1 - -/** - * save_xbzrle_page: compress and send current page - * - * Returns: 1 means that we wrote the page - * 0 means that page is identical to the one already sent - * -1 means that xbzrle would be longer than normal - * - * @f: QEMUFile where to send the data - * @current_data: - * @current_addr: - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - */ -static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, - ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, bool last_stage, - uint64_t *bytes_transferred) -{ - int encoded_len = 0, bytes_xbzrle; - uint8_t *prev_cached_page; - - if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { - acct_info.xbzrle_cache_miss++; - if (!last_stage) { - if (cache_insert(XBZRLE.cache, current_addr, *current_data, - bitmap_sync_count) == -1) { - return -1; - } else { - /* update *current_data when the page has been - inserted into cache */ - *current_data = get_cached_data(XBZRLE.cache, current_addr); - } - } - return -1; - } - - prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); - - /* save current buffer into memory */ - memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); - - /* XBZRLE encoding (if there is no overflow) */ - encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, - TARGET_PAGE_SIZE, XBZRLE.encoded_buf, - TARGET_PAGE_SIZE); - if (encoded_len == 0) { - DPRINTF("Skipping unmodified page\n"); - return 0; - } else if (encoded_len == -1) { - DPRINTF("Overflow\n"); - acct_info.xbzrle_overflows++; - /* update data in the cache */ - if (!last_stage) { - memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); - *current_data = prev_cached_page; - } - return -1; - } - - /* we need to update the data in the cache, in order to get the same data */ - if (!last_stage) { - memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); - } - - /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); - qemu_put_byte(f, ENCODING_FLAG_XBZRLE); - qemu_put_be16(f, encoded_len); - qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); - bytes_xbzrle += encoded_len + 1 + 2; - acct_info.xbzrle_pages++; - acct_info.xbzrle_bytes += bytes_xbzrle; - *bytes_transferred += bytes_xbzrle; - - return 1; -} - -static inline -ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, - ram_addr_t start) -{ - unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; - unsigned long nr = base + (start >> TARGET_PAGE_BITS); - uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); - unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); - - unsigned long next; - - if (ram_bulk_stage && nr > base) { - next = nr + 1; - } else { - next = find_next_bit(migration_bitmap, size, nr); - } - - if (next < size) { - clear_bit(next, migration_bitmap); - migration_dirty_pages--; - } - return (next - base) << TARGET_PAGE_BITS; -} - -static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) -{ - migration_dirty_pages += - cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length); -} - - -/* Fix me: there are too many global variables used in migration process. */ -static int64_t start_time; -static int64_t bytes_xfer_prev; -static int64_t num_dirty_pages_period; -static uint64_t xbzrle_cache_miss_prev; -static uint64_t iterations_prev; - -static void migration_bitmap_sync_init(void) -{ - start_time = 0; - bytes_xfer_prev = 0; - num_dirty_pages_period = 0; - xbzrle_cache_miss_prev = 0; - iterations_prev = 0; -} - -/* Called with iothread lock held, to protect ram_list.dirty_memory[] */ -static void migration_bitmap_sync(void) -{ - RAMBlock *block; - uint64_t num_dirty_pages_init = migration_dirty_pages; - MigrationState *s = migrate_get_current(); - int64_t end_time; - int64_t bytes_xfer_now; - - bitmap_sync_count++; - - if (!bytes_xfer_prev) { - bytes_xfer_prev = ram_bytes_transferred(); - } - - if (!start_time) { - start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - } - - trace_migration_bitmap_sync_start(); - address_space_sync_dirty_bitmap(&address_space_memory); - - rcu_read_lock(); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); - } - rcu_read_unlock(); - - trace_migration_bitmap_sync_end(migration_dirty_pages - - num_dirty_pages_init); - num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; - end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - - /* more than 1 second = 1000 millisecons */ - if (end_time > start_time + 1000) { - if (migrate_auto_converge()) { - /* The following detection logic can be refined later. For now: - Check to see if the dirtied bytes is 50% more than the approx. - amount of bytes that just got transferred since the last time we - were in this routine. If that happens >N times (for now N==4) - we turn on the throttle down logic */ - bytes_xfer_now = ram_bytes_transferred(); - if (s->dirty_pages_rate && - (num_dirty_pages_period * TARGET_PAGE_SIZE > - (bytes_xfer_now - bytes_xfer_prev)/2) && - (dirty_rate_high_cnt++ > 4)) { - trace_migration_throttle(); - mig_throttle_on = true; - dirty_rate_high_cnt = 0; - } - bytes_xfer_prev = bytes_xfer_now; - } else { - mig_throttle_on = false; - } - if (migrate_use_xbzrle()) { - if (iterations_prev != acct_info.iterations) { - acct_info.xbzrle_cache_miss_rate = - (double)(acct_info.xbzrle_cache_miss - - xbzrle_cache_miss_prev) / - (acct_info.iterations - iterations_prev); - } - iterations_prev = acct_info.iterations; - xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; - } - s->dirty_pages_rate = num_dirty_pages_period * 1000 - / (end_time - start_time); - s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; - start_time = end_time; - num_dirty_pages_period = 0; - } - s->dirty_sync_count = bitmap_sync_count; -} - -/** - * save_zero_page: Send the zero page to the stream - * - * Returns: Number of pages written. - * - * @f: QEMUFile where to send the data - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * @p: pointer to the page - * @bytes_transferred: increase it with the number of transferred bytes - */ -static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, - uint8_t *p, uint64_t *bytes_transferred) -{ - int pages = -1; - - if (is_zero_range(p, TARGET_PAGE_SIZE)) { - acct_info.dup_pages++; - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_COMPRESS); - qemu_put_byte(f, 0); - *bytes_transferred += 1; - pages = 1; - } - - return pages; -} - -/** - * ram_save_page: Send the given page to the stream - * - * Returns: Number of pages written. - * - * @f: QEMUFile where to send the data - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - */ -static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, - bool last_stage, uint64_t *bytes_transferred) -{ - int pages = -1; - uint64_t bytes_xmit; - ram_addr_t current_addr; - MemoryRegion *mr = block->mr; - uint8_t *p; - int ret; - bool send_async = true; - - p = memory_region_get_ram_ptr(mr) + offset; - - /* In doubt sent page as normal */ - bytes_xmit = 0; - ret = ram_control_save_page(f, block->offset, - offset, TARGET_PAGE_SIZE, &bytes_xmit); - if (bytes_xmit) { - *bytes_transferred += bytes_xmit; - pages = 1; - } - - XBZRLE_cache_lock(); - - current_addr = block->offset + offset; - - if (block == last_sent_block) { - offset |= RAM_SAVE_FLAG_CONTINUE; - } - if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { - if (ret != RAM_SAVE_CONTROL_DELAYED) { - if (bytes_xmit > 0) { - acct_info.norm_pages++; - } else if (bytes_xmit == 0) { - acct_info.dup_pages++; - } - } - } else { - pages = save_zero_page(f, block, offset, p, bytes_transferred); - if (pages > 0) { - /* Must let xbzrle know, otherwise a previous (now 0'd) cached - * page would be stale - */ - xbzrle_cache_zero_page(current_addr); - } else if (!ram_bulk_stage && migrate_use_xbzrle()) { - pages = save_xbzrle_page(f, &p, current_addr, block, - offset, last_stage, bytes_transferred); - if (!last_stage) { - /* Can't send this cached data async, since the cache page - * might get updated before it gets to the wire - */ - send_async = false; - } - } - } - - /* XBZRLE overflow or normal page */ - if (pages == -1) { - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_PAGE); - if (send_async) { - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); - } else { - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); - } - *bytes_transferred += TARGET_PAGE_SIZE; - pages = 1; - acct_info.norm_pages++; - } - - XBZRLE_cache_unlock(); - - return pages; -} - -static int do_compress_ram_page(CompressParam *param) -{ - int bytes_sent, blen; - uint8_t *p; - RAMBlock *block = param->block; - ram_addr_t offset = param->offset; - - p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK); - - bytes_sent = save_page_header(param->file, block, offset | - RAM_SAVE_FLAG_COMPRESS_PAGE); - blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE, - migrate_compress_level()); - bytes_sent += blen; - - return bytes_sent; -} - -static inline void start_compression(CompressParam *param) -{ - param->done = false; - qemu_mutex_lock(¶m->mutex); - param->start = true; - qemu_cond_signal(¶m->cond); - qemu_mutex_unlock(¶m->mutex); -} - -static inline void start_decompression(DecompressParam *param) -{ - qemu_mutex_lock(¶m->mutex); - param->start = true; - qemu_cond_signal(¶m->cond); - qemu_mutex_unlock(¶m->mutex); -} - -static uint64_t bytes_transferred; - -static void flush_compressed_data(QEMUFile *f) -{ - int idx, len, thread_count; - - if (!migrate_use_compression()) { - return; - } - thread_count = migrate_compress_threads(); - for (idx = 0; idx < thread_count; idx++) { - if (!comp_param[idx].done) { - qemu_mutex_lock(comp_done_lock); - while (!comp_param[idx].done && !quit_comp_thread) { - qemu_cond_wait(comp_done_cond, comp_done_lock); - } - qemu_mutex_unlock(comp_done_lock); - } - if (!quit_comp_thread) { - len = qemu_put_qemu_file(f, comp_param[idx].file); - bytes_transferred += len; - } - } -} - -static inline void set_compress_params(CompressParam *param, RAMBlock *block, - ram_addr_t offset) -{ - param->block = block; - param->offset = offset; -} - -static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, - ram_addr_t offset, - uint64_t *bytes_transferred) -{ - int idx, thread_count, bytes_xmit = -1, pages = -1; - - thread_count = migrate_compress_threads(); - qemu_mutex_lock(comp_done_lock); - while (true) { - for (idx = 0; idx < thread_count; idx++) { - if (comp_param[idx].done) { - bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file); - set_compress_params(&comp_param[idx], block, offset); - start_compression(&comp_param[idx]); - pages = 1; - acct_info.norm_pages++; - *bytes_transferred += bytes_xmit; - break; - } - } - if (pages > 0) { - break; - } else { - qemu_cond_wait(comp_done_cond, comp_done_lock); - } - } - qemu_mutex_unlock(comp_done_lock); - - return pages; -} - -/** - * ram_save_compressed_page: compress the given page and send it to the stream - * - * Returns: Number of pages written. - * - * @f: QEMUFile where to send the data - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - */ -static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, - ram_addr_t offset, bool last_stage, - uint64_t *bytes_transferred) -{ - int pages = -1; - uint64_t bytes_xmit; - MemoryRegion *mr = block->mr; - uint8_t *p; - int ret; - - p = memory_region_get_ram_ptr(mr) + offset; - - bytes_xmit = 0; - ret = ram_control_save_page(f, block->offset, - offset, TARGET_PAGE_SIZE, &bytes_xmit); - if (bytes_xmit) { - *bytes_transferred += bytes_xmit; - pages = 1; - } - if (block == last_sent_block) { - offset |= RAM_SAVE_FLAG_CONTINUE; - } - if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { - if (ret != RAM_SAVE_CONTROL_DELAYED) { - if (bytes_xmit > 0) { - acct_info.norm_pages++; - } else if (bytes_xmit == 0) { - acct_info.dup_pages++; - } - } - } else { - /* When starting the process of a new block, the first page of - * the block should be sent out before other pages in the same - * block, and all the pages in last block should have been sent - * out, keeping this order is important, because the 'cont' flag - * is used to avoid resending the block name. - */ - if (block != last_sent_block) { - flush_compressed_data(f); - pages = save_zero_page(f, block, offset, p, bytes_transferred); - if (pages == -1) { - set_compress_params(&comp_param[0], block, offset); - /* Use the qemu thread to compress the data to make sure the - * first page is sent out before other pages - */ - bytes_xmit = do_compress_ram_page(&comp_param[0]); - acct_info.norm_pages++; - qemu_put_qemu_file(f, comp_param[0].file); - *bytes_transferred += bytes_xmit; - pages = 1; - } - } else { - pages = save_zero_page(f, block, offset, p, bytes_transferred); - if (pages == -1) { - pages = compress_page_with_multi_thread(f, block, offset, - bytes_transferred); - } - } - } - - return pages; -} - -/** - * ram_find_and_save_block: Finds a dirty page and sends it to f - * - * Called within an RCU critical section. - * - * Returns: The number of pages written - * 0 means no dirty pages - * - * @f: QEMUFile where to send the data - * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - */ - -static int ram_find_and_save_block(QEMUFile *f, bool last_stage, - uint64_t *bytes_transferred) -{ - RAMBlock *block = last_seen_block; - ram_addr_t offset = last_offset; - bool complete_round = false; - int pages = 0; - MemoryRegion *mr; - - if (!block) - block = QLIST_FIRST_RCU(&ram_list.blocks); - - while (true) { - mr = block->mr; - offset = migration_bitmap_find_and_reset_dirty(mr, offset); - if (complete_round && block == last_seen_block && - offset >= last_offset) { - break; - } - if (offset >= block->used_length) { - offset = 0; - block = QLIST_NEXT_RCU(block, next); - if (!block) { - block = QLIST_FIRST_RCU(&ram_list.blocks); - complete_round = true; - ram_bulk_stage = false; - if (migrate_use_xbzrle()) { - /* If xbzrle is on, stop using the data compression at this - * point. In theory, xbzrle can do better than compression. - */ - flush_compressed_data(f); - compression_switch = false; - } - } - } else { - if (compression_switch && migrate_use_compression()) { - pages = ram_save_compressed_page(f, block, offset, last_stage, - bytes_transferred); - } else { - pages = ram_save_page(f, block, offset, last_stage, - bytes_transferred); - } - - /* if page is unmodified, continue to the next */ - if (pages > 0) { - last_sent_block = block; - break; - } - } - } - - last_seen_block = block; - last_offset = offset; - - return pages; -} - -void acct_update_position(QEMUFile *f, size_t size, bool zero) -{ - uint64_t pages = size / TARGET_PAGE_SIZE; - if (zero) { - acct_info.dup_pages += pages; - } else { - acct_info.norm_pages += pages; - bytes_transferred += size; - qemu_update_position(f, size); - } -} - -static ram_addr_t ram_save_remaining(void) -{ - return migration_dirty_pages; -} - -uint64_t ram_bytes_remaining(void) -{ - return ram_save_remaining() * TARGET_PAGE_SIZE; -} - -uint64_t ram_bytes_transferred(void) -{ - return bytes_transferred; -} - -uint64_t ram_bytes_total(void) -{ - RAMBlock *block; - uint64_t total = 0; - - rcu_read_lock(); - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) - total += block->used_length; - rcu_read_unlock(); - return total; -} - -void free_xbzrle_decoded_buf(void) -{ - g_free(xbzrle_decoded_buf); - xbzrle_decoded_buf = NULL; -} - -static void migration_end(void) -{ - if (migration_bitmap) { - memory_global_dirty_log_stop(); - g_free(migration_bitmap); - migration_bitmap = NULL; - } - - XBZRLE_cache_lock(); - if (XBZRLE.cache) { - cache_fini(XBZRLE.cache); - g_free(XBZRLE.encoded_buf); - g_free(XBZRLE.current_buf); - XBZRLE.cache = NULL; - XBZRLE.encoded_buf = NULL; - XBZRLE.current_buf = NULL; - } - XBZRLE_cache_unlock(); -} - -static void ram_migration_cancel(void *opaque) -{ - migration_end(); -} - -static void reset_ram_globals(void) -{ - last_seen_block = NULL; - last_sent_block = NULL; - last_offset = 0; - last_version = ram_list.version; - ram_bulk_stage = true; -} - -#define MAX_WAIT 50 /* ms, half buffered_file limit */ - - -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has - * long-running RCU critical section. When rcu-reclaims in the code - * start to become numerous it will be necessary to reduce the - * granularity of these critical sections. - */ - -static int ram_save_setup(QEMUFile *f, void *opaque) -{ - RAMBlock *block; - int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ - - mig_throttle_on = false; - dirty_rate_high_cnt = 0; - bitmap_sync_count = 0; - migration_bitmap_sync_init(); - - if (migrate_use_xbzrle()) { - XBZRLE_cache_lock(); - XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / - TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE); - if (!XBZRLE.cache) { - XBZRLE_cache_unlock(); - error_report("Error creating cache"); - return -1; - } - XBZRLE_cache_unlock(); - - /* We prefer not to abort if there is no memory */ - XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); - if (!XBZRLE.encoded_buf) { - error_report("Error allocating encoded_buf"); - return -1; - } - - XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); - if (!XBZRLE.current_buf) { - error_report("Error allocating current_buf"); - g_free(XBZRLE.encoded_buf); - XBZRLE.encoded_buf = NULL; - return -1; - } - - acct_clear(); - } - - /* iothread lock needed for ram_list.dirty_memory[] */ - qemu_mutex_lock_iothread(); - qemu_mutex_lock_ramlist(); - rcu_read_lock(); - bytes_transferred = 0; - reset_ram_globals(); - - ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; - migration_bitmap = bitmap_new(ram_bitmap_pages); - bitmap_set(migration_bitmap, 0, ram_bitmap_pages); - - /* - * Count the total number of pages used by ram blocks not including any - * gaps due to alignment or unplugs. - */ - migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; - - memory_global_dirty_log_start(); - migration_bitmap_sync(); - qemu_mutex_unlock_ramlist(); - qemu_mutex_unlock_iothread(); - - qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); - - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); - qemu_put_be64(f, block->used_length); - } - - rcu_read_unlock(); - - ram_control_before_iterate(f, RAM_CONTROL_SETUP); - ram_control_after_iterate(f, RAM_CONTROL_SETUP); - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - - return 0; -} - -static int ram_save_iterate(QEMUFile *f, void *opaque) -{ - int ret; - int i; - int64_t t0; - int pages_sent = 0; - - rcu_read_lock(); - if (ram_list.version != last_version) { - reset_ram_globals(); - } - - /* Read version before ram_list.blocks */ - smp_rmb(); - - ram_control_before_iterate(f, RAM_CONTROL_ROUND); - - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - i = 0; - while ((ret = qemu_file_rate_limit(f)) == 0) { - int pages; - - pages = ram_find_and_save_block(f, false, &bytes_transferred); - /* no more pages to sent */ - if (pages == 0) { - break; - } - pages_sent += pages; - acct_info.iterations++; - check_guest_throttling(); - /* we want to check in the 1st loop, just in case it was the 1st time - and we had to sync the dirty bitmap. - qemu_get_clock_ns() is a bit expensive, so we only check each some - iterations - */ - if ((i & 63) == 0) { - uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; - if (t1 > MAX_WAIT) { - DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", - t1, i); - break; - } - } - i++; - } - flush_compressed_data(f); - rcu_read_unlock(); - - /* - * Must occur before EOS (or any QEMUFile operation) - * because of RDMA protocol. - */ - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - bytes_transferred += 8; - - ret = qemu_file_get_error(f); - if (ret < 0) { - return ret; - } - - return pages_sent; -} - -/* Called with iothread lock */ -static int ram_save_complete(QEMUFile *f, void *opaque) -{ - rcu_read_lock(); - - migration_bitmap_sync(); - - ram_control_before_iterate(f, RAM_CONTROL_FINISH); - - /* try transferring iterative blocks of memory */ - - /* flush all remaining blocks regardless of rate limiting */ - while (true) { - int pages; - - pages = ram_find_and_save_block(f, true, &bytes_transferred); - /* no more blocks to sent */ - if (pages == 0) { - break; - } - } - - flush_compressed_data(f); - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - migration_end(); - - rcu_read_unlock(); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - - return 0; -} - -static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) -{ - uint64_t remaining_size; - - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - - if (remaining_size < max_size) { - qemu_mutex_lock_iothread(); - rcu_read_lock(); - migration_bitmap_sync(); - rcu_read_unlock(); - qemu_mutex_unlock_iothread(); - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; - } - return remaining_size; -} - -static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) -{ - unsigned int xh_len; - int xh_flags; - - if (!xbzrle_decoded_buf) { - xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE); - } - - /* extract RLE header */ - xh_flags = qemu_get_byte(f); - xh_len = qemu_get_be16(f); - - if (xh_flags != ENCODING_FLAG_XBZRLE) { - error_report("Failed to load XBZRLE page - wrong compression!"); - return -1; - } - - if (xh_len > TARGET_PAGE_SIZE) { - error_report("Failed to load XBZRLE page - len overflow!"); - return -1; - } - /* load data and decode */ - qemu_get_buffer(f, xbzrle_decoded_buf, xh_len); - - /* decode RLE */ - if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, - TARGET_PAGE_SIZE) == -1) { - error_report("Failed to load XBZRLE page - decode error!"); - return -1; - } - - return 0; -} - -/* Must be called from within a rcu critical section. - * Returns a pointer from within the RCU-protected ram_list. - */ -static inline void *host_from_stream_offset(QEMUFile *f, - ram_addr_t offset, - int flags) -{ - static RAMBlock *block = NULL; - char id[256]; - uint8_t len; - - if (flags & RAM_SAVE_FLAG_CONTINUE) { - if (!block || block->max_length <= offset) { - error_report("Ack, bad migration stream!"); - return NULL; - } - - return memory_region_get_ram_ptr(block->mr) + offset; - } - - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id)) && - block->max_length > offset) { - return memory_region_get_ram_ptr(block->mr) + offset; - } - } - - error_report("Can't find block %s!", id); - return NULL; -} - -/* - * If a page (or a whole RDMA chunk) has been - * determined to be zero, then zap it. - */ -void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) -{ - if (ch != 0 || !is_zero_range(host, size)) { - memset(host, ch, size); - } -} - -static void *do_data_decompress(void *opaque) -{ - DecompressParam *param = opaque; - unsigned long pagesize; - - while (!quit_decomp_thread) { - qemu_mutex_lock(¶m->mutex); - while (!param->start && !quit_decomp_thread) { - qemu_cond_wait(¶m->cond, ¶m->mutex); - pagesize = TARGET_PAGE_SIZE; - if (!quit_decomp_thread) { - /* uncompress() will return failed in some case, especially - * when the page is dirted when doing the compression, it's - * not a problem because the dirty page will be retransferred - * and uncompress() won't break the data in other pages. - */ - uncompress((Bytef *)param->des, &pagesize, - (const Bytef *)param->compbuf, param->len); - } - param->start = false; - } - qemu_mutex_unlock(¶m->mutex); - } - - return NULL; -} - -void migrate_decompress_threads_create(void) -{ - int i, thread_count; - - thread_count = migrate_decompress_threads(); - decompress_threads = g_new0(QemuThread, thread_count); - decomp_param = g_new0(DecompressParam, thread_count); - compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); - quit_decomp_thread = false; - for (i = 0; i < thread_count; i++) { - qemu_mutex_init(&decomp_param[i].mutex); - qemu_cond_init(&decomp_param[i].cond); - decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); - qemu_thread_create(decompress_threads + i, "decompress", - do_data_decompress, decomp_param + i, - QEMU_THREAD_JOINABLE); - } -} - -void migrate_decompress_threads_join(void) -{ - int i, thread_count; - - quit_decomp_thread = true; - thread_count = migrate_decompress_threads(); - for (i = 0; i < thread_count; i++) { - qemu_mutex_lock(&decomp_param[i].mutex); - qemu_cond_signal(&decomp_param[i].cond); - qemu_mutex_unlock(&decomp_param[i].mutex); - } - for (i = 0; i < thread_count; i++) { - qemu_thread_join(decompress_threads + i); - qemu_mutex_destroy(&decomp_param[i].mutex); - qemu_cond_destroy(&decomp_param[i].cond); - g_free(decomp_param[i].compbuf); - } - g_free(decompress_threads); - g_free(decomp_param); - g_free(compressed_data_buf); - decompress_threads = NULL; - decomp_param = NULL; - compressed_data_buf = NULL; -} - -static void decompress_data_with_multi_threads(uint8_t *compbuf, - void *host, int len) -{ - int idx, thread_count; - - thread_count = migrate_decompress_threads(); - while (true) { - for (idx = 0; idx < thread_count; idx++) { - if (!decomp_param[idx].start) { - memcpy(decomp_param[idx].compbuf, compbuf, len); - decomp_param[idx].des = host; - decomp_param[idx].len = len; - start_decompression(&decomp_param[idx]); - break; - } - } - if (idx < thread_count) { - break; - } - } -} - -static int ram_load(QEMUFile *f, void *opaque, int version_id) -{ - int flags = 0, ret = 0; - static uint64_t seq_iter; - int len = 0; - - seq_iter++; - - if (version_id != 4) { - ret = -EINVAL; - } - - /* This RCU critical section can be very long running. - * When RCU reclaims in the code start to become numerous, - * it will be necessary to reduce the granularity of this - * critical section. - */ - rcu_read_lock(); - while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { - ram_addr_t addr, total_ram_bytes; - void *host; - uint8_t ch; - - addr = qemu_get_be64(f); - flags = addr & ~TARGET_PAGE_MASK; - addr &= TARGET_PAGE_MASK; - - switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { - case RAM_SAVE_FLAG_MEM_SIZE: - /* Synchronize RAM block list */ - total_ram_bytes = addr; - while (!ret && total_ram_bytes) { - RAMBlock *block; - uint8_t len; - char id[256]; - ram_addr_t length; - - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - length = qemu_get_be64(f); - - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) { - if (length != block->used_length) { - Error *local_err = NULL; - - ret = qemu_ram_resize(block->offset, length, &local_err); - if (local_err) { - error_report_err(local_err); - } - } - break; - } - } - - if (!block) { - error_report("Unknown ramblock \"%s\", cannot " - "accept migration", id); - ret = -EINVAL; - } - - total_ram_bytes -= length; - } - break; - case RAM_SAVE_FLAG_COMPRESS: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - ch = qemu_get_byte(f); - ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); - break; - case RAM_SAVE_FLAG_PAGE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); - break; - case RAM_SAVE_FLAG_COMPRESS_PAGE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Invalid RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - - len = qemu_get_be32(f); - if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { - error_report("Invalid compressed data length: %d", len); - ret = -EINVAL; - break; - } - qemu_get_buffer(f, compressed_data_buf, len); - decompress_data_with_multi_threads(compressed_data_buf, host, len); - break; - case RAM_SAVE_FLAG_XBZRLE: - host = host_from_stream_offset(f, addr, flags); - if (!host) { - error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - if (load_xbzrle(f, addr, host) < 0) { - error_report("Failed to decompress XBZRLE page at " - RAM_ADDR_FMT, addr); - ret = -EINVAL; - break; - } - break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - break; - default: - if (flags & RAM_SAVE_FLAG_HOOK) { - ram_control_load_hook(f, flags); - } else { - error_report("Unknown combination of migration flags: %#x", - flags); - ret = -EINVAL; - } - } - if (!ret) { - ret = qemu_file_get_error(f); - } - } - - rcu_read_unlock(); - DPRINTF("Completed load of VM with exit code %d seq iteration " - "%" PRIu64 "\n", ret, seq_iter); - return ret; -} - -static SaveVMHandlers savevm_ram_handlers = { - .save_live_setup = ram_save_setup, - .save_live_iterate = ram_save_iterate, - .save_live_complete = ram_save_complete, - .save_live_pending = ram_save_pending, - .load_state = ram_load, - .cancel = ram_migration_cancel, -}; - -void ram_mig_init(void) -{ - qemu_mutex_init(&XBZRLE.lock); - register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); -} - struct soundhw { const char *name; const char *descr; @@ -1869,52 +307,3 @@ TargetInfo *qmp_query_target(Error **errp) return info; } - -/* Stub function that's gets run on the vcpu when its brought out of the - VM to run inside qemu via async_run_on_cpu()*/ -static void mig_sleep_cpu(void *opq) -{ - qemu_mutex_unlock_iothread(); - g_usleep(30*1000); - qemu_mutex_lock_iothread(); -} - -/* To reduce the dirty rate explicitly disallow the VCPUs from spending - much time in the VM. The migration thread will try to catchup. - Workload will experience a performance drop. -*/ -static void mig_throttle_guest_down(void) -{ - CPUState *cpu; - - qemu_mutex_lock_iothread(); - CPU_FOREACH(cpu) { - async_run_on_cpu(cpu, mig_sleep_cpu, NULL); - } - qemu_mutex_unlock_iothread(); -} - -static void check_guest_throttling(void) -{ - static int64_t t0; - int64_t t1; - - if (!mig_throttle_on) { - return; - } - - if (!t0) { - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - return; - } - - t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - - /* If it has been more than 40 ms since the last time the guest - * was throttled then do it again. - */ - if (40 < (t1-t0)/1000000) { - mig_throttle_guest_down(); - t0 = t1; - } -} @@ -480,6 +480,7 @@ static const VMStateDescription icount_vmstate_timers = { .name = "timer/icount", .version_id = 1, .minimum_version_id = 1, + .needed = icount_state_needed, .fields = (VMStateField[]) { VMSTATE_INT64(qemu_icount_bias, TimersState), VMSTATE_INT64(qemu_icount, TimersState), @@ -497,13 +498,9 @@ static const VMStateDescription vmstate_timers = { VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &icount_vmstate_timers, - .needed = icount_state_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &icount_vmstate_timers, + NULL } }; diff --git a/docs/migration.txt b/docs/migration.txt index 0492a4547a..f6df4beb2a 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -257,6 +257,7 @@ const VMStateDescription vmstate_ide_drive_pio_state = { .minimum_version_id = 1, .pre_save = ide_drive_pio_pre_save, .post_load = ide_drive_pio_post_load, + .needed = ide_drive_pio_state_needed, .fields = (VMStateField[]) { VMSTATE_INT32(req_nb_sectors, IDEState), VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1, @@ -279,13 +280,9 @@ const VMStateDescription vmstate_ide_drive = { .... several fields .... VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_ide_drive_pio_state, - .needed = ide_drive_pio_state_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_ide_drive_pio_state, + NULL } }; @@ -454,6 +454,7 @@ static const VMStateDescription vmstate_cpu_common_exception_index = { .name = "cpu_common/exception_index", .version_id = 1, .minimum_version_id = 1, + .needed = cpu_common_exception_index_needed, .fields = (VMStateField[]) { VMSTATE_INT32(exception_index, CPUState), VMSTATE_END_OF_LIST() @@ -471,13 +472,9 @@ const VMStateDescription vmstate_cpu_common = { VMSTATE_UINT32(interrupt_request, CPUState), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_cpu_common_exception_index, - .needed = cpu_common_exception_index_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_cpu_common_exception_index, + NULL } }; @@ -3348,14 +3345,20 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr) return res; } -void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) +int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) { RAMBlock *block; + int ret = 0; rcu_read_lock(); QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - func(block->host, block->offset, block->used_length, opaque); + ret = func(block->idstr, block->host, block->offset, + block->used_length, opaque); + if (ret) { + break; + } } rcu_read_unlock(); + return ret; } #endif diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 25bc023882..8a64ffb38f 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -152,6 +152,7 @@ static const VMStateDescription vmstate_memhp_state = { .version_id = 1, .minimum_version_id = 1, .minimum_version_id_old = 1, + .needed = vmstate_test_use_memhp, .fields = (VMStateField[]) { VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs), VMSTATE_END_OF_LIST() @@ -175,12 +176,9 @@ const VMStateDescription vmstate_ich9_pm = { VMSTATE_UINT32(smi_sts, ICH9LPCPMRegs), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_memhp_state, - .needed = vmstate_test_use_memhp, - }, - VMSTATE_END_OF_LIST() + .subsections = (const VMStateDescription*[]) { + &vmstate_memhp_state, + NULL } }; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index b730ca6ced..3bd1d5a865 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -260,6 +260,7 @@ static const VMStateDescription vmstate_memhp_state = { .version_id = 1, .minimum_version_id = 1, .minimum_version_id_old = 1, + .needed = vmstate_test_use_memhp, .fields = (VMStateField[]) { VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState), VMSTATE_END_OF_LIST() @@ -298,12 +299,9 @@ static const VMStateDescription vmstate_acpi = { vmstate_test_use_acpi_pci_hotplug), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_memhp_state, - .needed = vmstate_test_use_memhp, - }, - VMSTATE_END_OF_LIST() + .subsections = (const VMStateDescription*[]) { + &vmstate_memhp_state, + NULL } }; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 6e794597dc..5e1b67ee43 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -699,6 +699,7 @@ static const VMStateDescription vmstate_fdrive_media_changed = { .name = "fdrive/media_changed", .version_id = 1, .minimum_version_id = 1, + .needed = fdrive_media_changed_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(media_changed, FDrive), VMSTATE_END_OF_LIST() @@ -716,6 +717,7 @@ static const VMStateDescription vmstate_fdrive_media_rate = { .name = "fdrive/media_rate", .version_id = 1, .minimum_version_id = 1, + .needed = fdrive_media_rate_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(media_rate, FDrive), VMSTATE_END_OF_LIST() @@ -733,6 +735,7 @@ static const VMStateDescription vmstate_fdrive_perpendicular = { .name = "fdrive/perpendicular", .version_id = 1, .minimum_version_id = 1, + .needed = fdrive_perpendicular_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(perpendicular, FDrive), VMSTATE_END_OF_LIST() @@ -756,19 +759,11 @@ static const VMStateDescription vmstate_fdrive = { VMSTATE_UINT8(sect, FDrive), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_fdrive_media_changed, - .needed = &fdrive_media_changed_needed, - } , { - .vmsd = &vmstate_fdrive_media_rate, - .needed = &fdrive_media_rate_needed, - } , { - .vmsd = &vmstate_fdrive_perpendicular, - .needed = &fdrive_perpendicular_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_fdrive_media_changed, + &vmstate_fdrive_media_rate, + &vmstate_fdrive_perpendicular, + NULL } }; @@ -833,6 +828,7 @@ static const VMStateDescription vmstate_fdc_reset_sensei = { .name = "fdc/reset_sensei", .version_id = 1, .minimum_version_id = 1, + .needed = fdc_reset_sensei_needed, .fields = (VMStateField[]) { VMSTATE_INT32(reset_sensei, FDCtrl), VMSTATE_END_OF_LIST() @@ -850,6 +846,7 @@ static const VMStateDescription vmstate_fdc_result_timer = { .name = "fdc/result_timer", .version_id = 1, .minimum_version_id = 1, + .needed = fdc_result_timer_needed, .fields = (VMStateField[]) { VMSTATE_TIMER_PTR(result_timer, FDCtrl), VMSTATE_END_OF_LIST() @@ -867,6 +864,7 @@ static const VMStateDescription vmstate_fdc_phase = { .name = "fdc/phase", .version_id = 1, .minimum_version_id = 1, + .needed = fdc_phase_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(phase, FDCtrl), VMSTATE_END_OF_LIST() @@ -911,19 +909,11 @@ static const VMStateDescription vmstate_fdc = { vmstate_fdrive, FDrive), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_fdc_reset_sensei, - .needed = fdc_reset_sensei_needed, - } , { - .vmsd = &vmstate_fdc_result_timer, - .needed = fdc_result_timer_needed, - } , { - .vmsd = &vmstate_fdc_phase, - .needed = fdc_phase_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_fdc_reset_sensei, + &vmstate_fdc_result_timer, + &vmstate_fdc_phase, + NULL } }; diff --git a/hw/char/serial.c b/hw/char/serial.c index 55011cfd26..513d73c27f 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -662,6 +662,7 @@ static const VMStateDescription vmstate_serial_thr_ipending = { .name = "serial/thr_ipending", .version_id = 1, .minimum_version_id = 1, + .needed = serial_thr_ipending_needed, .fields = (VMStateField[]) { VMSTATE_INT32(thr_ipending, SerialState), VMSTATE_END_OF_LIST() @@ -678,6 +679,7 @@ static const VMStateDescription vmstate_serial_tsr = { .name = "serial/tsr", .version_id = 1, .minimum_version_id = 1, + .needed = serial_tsr_needed, .fields = (VMStateField[]) { VMSTATE_INT32(tsr_retry, SerialState), VMSTATE_UINT8(thr, SerialState), @@ -697,6 +699,7 @@ static const VMStateDescription vmstate_serial_recv_fifo = { .name = "serial/recv_fifo", .version_id = 1, .minimum_version_id = 1, + .needed = serial_recv_fifo_needed, .fields = (VMStateField[]) { VMSTATE_STRUCT(recv_fifo, SerialState, 1, vmstate_fifo8, Fifo8), VMSTATE_END_OF_LIST() @@ -713,6 +716,7 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { .name = "serial/xmit_fifo", .version_id = 1, .minimum_version_id = 1, + .needed = serial_xmit_fifo_needed, .fields = (VMStateField[]) { VMSTATE_STRUCT(xmit_fifo, SerialState, 1, vmstate_fifo8, Fifo8), VMSTATE_END_OF_LIST() @@ -729,6 +733,7 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { .name = "serial/fifo_timeout_timer", .version_id = 1, .minimum_version_id = 1, + .needed = serial_fifo_timeout_timer_needed, .fields = (VMStateField[]) { VMSTATE_TIMER_PTR(fifo_timeout_timer, SerialState), VMSTATE_END_OF_LIST() @@ -745,6 +750,7 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { .name = "serial/timeout_ipending", .version_id = 1, .minimum_version_id = 1, + .needed = serial_timeout_ipending_needed, .fields = (VMStateField[]) { VMSTATE_INT32(timeout_ipending, SerialState), VMSTATE_END_OF_LIST() @@ -760,6 +766,7 @@ static bool serial_poll_needed(void *opaque) static const VMStateDescription vmstate_serial_poll = { .name = "serial/poll", .version_id = 1, + .needed = serial_poll_needed, .minimum_version_id = 1, .fields = (VMStateField[]) { VMSTATE_INT32(poll_msl, SerialState), @@ -788,31 +795,15 @@ const VMStateDescription vmstate_serial = { VMSTATE_UINT8_V(fcr_vmstate, SerialState, 3), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_serial_thr_ipending, - .needed = &serial_thr_ipending_needed, - } , { - .vmsd = &vmstate_serial_tsr, - .needed = &serial_tsr_needed, - } , { - .vmsd = &vmstate_serial_recv_fifo, - .needed = &serial_recv_fifo_needed, - } , { - .vmsd = &vmstate_serial_xmit_fifo, - .needed = &serial_xmit_fifo_needed, - } , { - .vmsd = &vmstate_serial_fifo_timeout_timer, - .needed = &serial_fifo_timeout_timer_needed, - } , { - .vmsd = &vmstate_serial_timeout_ipending, - .needed = &serial_timeout_ipending_needed, - } , { - .vmsd = &vmstate_serial_poll, - .needed = &serial_poll_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_serial_thr_ipending, + &vmstate_serial_tsr, + &vmstate_serial_recv_fifo, + &vmstate_serial_xmit_fifo, + &vmstate_serial_fifo_timeout_timer, + &vmstate_serial_timeout_ipending, + &vmstate_serial_poll, + NULL } }; diff --git a/hw/display/qxl.c b/hw/display/qxl.c index b220e2d5d2..722146ec3a 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2220,6 +2220,7 @@ static VMStateDescription qxl_vmstate_monitors_config = { .name = "qxl/monitors-config", .version_id = 1, .minimum_version_id = 1, + .needed = qxl_monitors_config_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(guest_monitors_config, PCIQXLDevice), VMSTATE_END_OF_LIST() @@ -2253,13 +2254,9 @@ static VMStateDescription qxl_vmstate = { VMSTATE_UINT64(guest_cursor, PCIQXLDevice), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &qxl_vmstate_monitors_config, - .needed = qxl_monitors_config_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &qxl_vmstate_monitors_config, + NULL } }; diff --git a/hw/display/vga.c b/hw/display/vga.c index d1d296c74e..b35d523e65 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -2035,6 +2035,7 @@ static const VMStateDescription vmstate_vga_endian = { .name = "vga.endian", .version_id = 1, .minimum_version_id = 1, + .needed = vga_endian_state_needed, .fields = (VMStateField[]) { VMSTATE_BOOL(big_endian_fb, VGACommonState), VMSTATE_END_OF_LIST() @@ -2078,13 +2079,9 @@ const VMStateDescription vmstate_vga_common = { VMSTATE_UINT32(vbe_bank_mask, VGACommonState), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_vga_endian, - .needed = vga_endian_state_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_vga_endian, + NULL } }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 5253e6d4fa..e142f75649 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -52,6 +52,7 @@ #ifdef CONFIG_XEN # include <xen/hvm/hvm_info_table.h> #endif +#include "migration/migration.h" #define MAX_IDE_BUS 2 @@ -305,6 +306,7 @@ static void pc_init1(MachineState *machine) static void pc_compat_2_3(MachineState *machine) { + savevm_skip_section_footers(); } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 110dfb78a8..b68263d231 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -45,6 +45,7 @@ #include "hw/usb.h" #include "hw/cpu/icc_bus.h" #include "qemu/error-report.h" +#include "migration/migration.h" /* ICH9 AHCI has 6 ports */ #define MAX_SATA_PORTS 6 @@ -289,6 +290,7 @@ static void pc_q35_init(MachineState *machine) static void pc_compat_2_3(MachineState *machine) { + savevm_skip_section_footers(); } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/ide/core.c b/hw/ide/core.c index fcb908061c..1efd98af63 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2561,6 +2561,7 @@ static const VMStateDescription vmstate_ide_atapi_gesn_state = { .name ="ide_drive/atapi/gesn_state", .version_id = 1, .minimum_version_id = 1, + .needed = ide_atapi_gesn_needed, .fields = (VMStateField[]) { VMSTATE_BOOL(events.new_media, IDEState), VMSTATE_BOOL(events.eject_request, IDEState), @@ -2572,6 +2573,7 @@ static const VMStateDescription vmstate_ide_tray_state = { .name = "ide_drive/tray_state", .version_id = 1, .minimum_version_id = 1, + .needed = ide_tray_state_needed, .fields = (VMStateField[]) { VMSTATE_BOOL(tray_open, IDEState), VMSTATE_BOOL(tray_locked, IDEState), @@ -2585,6 +2587,7 @@ static const VMStateDescription vmstate_ide_drive_pio_state = { .minimum_version_id = 1, .pre_save = ide_drive_pio_pre_save, .post_load = ide_drive_pio_post_load, + .needed = ide_drive_pio_state_needed, .fields = (VMStateField[]) { VMSTATE_INT32(req_nb_sectors, IDEState), VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1, @@ -2626,19 +2629,11 @@ const VMStateDescription vmstate_ide_drive = { VMSTATE_UINT8_V(cdrom_changed, IDEState, 3), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_ide_drive_pio_state, - .needed = ide_drive_pio_state_needed, - }, { - .vmsd = &vmstate_ide_tray_state, - .needed = ide_tray_state_needed, - }, { - .vmsd = &vmstate_ide_atapi_gesn_state, - .needed = ide_atapi_gesn_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_ide_drive_pio_state, + &vmstate_ide_tray_state, + &vmstate_ide_atapi_gesn_state, + NULL } }; @@ -2646,6 +2641,7 @@ static const VMStateDescription vmstate_ide_error_status = { .name ="ide_bus/error", .version_id = 2, .minimum_version_id = 1, + .needed = ide_error_needed, .fields = (VMStateField[]) { VMSTATE_INT32(error_status, IDEBus), VMSTATE_INT64_V(retry_sector_num, IDEBus, 2), @@ -2664,13 +2660,9 @@ const VMStateDescription vmstate_ide_bus = { VMSTATE_UINT8(unit, IDEBus), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_ide_error_status, - .needed = ide_error_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_ide_error_status, + NULL } }; diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 4b5e32dcbe..4afd0cfe8c 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -350,6 +350,7 @@ static const VMStateDescription vmstate_bmdma_current = { .name = "ide bmdma_current", .version_id = 1, .minimum_version_id = 1, + .needed = ide_bmdma_current_needed, .fields = (VMStateField[]) { VMSTATE_UINT32(cur_addr, BMDMAState), VMSTATE_UINT32(cur_prd_last, BMDMAState), @@ -363,6 +364,7 @@ static const VMStateDescription vmstate_bmdma_status = { .name ="ide bmdma/status", .version_id = 1, .minimum_version_id = 1, + .needed = ide_bmdma_status_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(status, BMDMAState), VMSTATE_END_OF_LIST() @@ -383,16 +385,10 @@ static const VMStateDescription vmstate_bmdma = { VMSTATE_UINT8(migration_retry_unit, BMDMAState), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_bmdma_current, - .needed = ide_bmdma_current_needed, - }, { - .vmsd = &vmstate_bmdma_status, - .needed = ide_bmdma_status_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_bmdma_current, + &vmstate_bmdma_status, + NULL } }; diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c index 9b9a7d7a8a..ddac69df6f 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -391,23 +391,24 @@ static int kbd_outport_post_load(void *opaque, int version_id) return 0; } +static bool kbd_outport_needed(void *opaque) +{ + KBDState *s = opaque; + return s->outport != kbd_outport_default(s); +} + static const VMStateDescription vmstate_kbd_outport = { .name = "pckbd_outport", .version_id = 1, .minimum_version_id = 1, .post_load = kbd_outport_post_load, + .needed = kbd_outport_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(outport, KBDState), VMSTATE_END_OF_LIST() } }; -static bool kbd_outport_needed(void *opaque) -{ - KBDState *s = opaque; - return s->outport != kbd_outport_default(s); -} - static int kbd_post_load(void *opaque, int version_id) { KBDState *s = opaque; @@ -430,12 +431,9 @@ static const VMStateDescription vmstate_kbd = { VMSTATE_UINT8(pending, KBDState), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_kbd_outport, - .needed = kbd_outport_needed, - }, - VMSTATE_END_OF_LIST() + .subsections = (const VMStateDescription*[]) { + &vmstate_kbd_outport, + NULL } }; diff --git a/hw/input/ps2.c b/hw/input/ps2.c index 4baeea2b56..fdbe565e62 100644 --- a/hw/input/ps2.c +++ b/hw/input/ps2.c @@ -677,6 +677,7 @@ static const VMStateDescription vmstate_ps2_keyboard_ledstate = { .version_id = 3, .minimum_version_id = 2, .post_load = ps2_kbd_ledstate_post_load, + .needed = ps2_keyboard_ledstate_needed, .fields = (VMStateField[]) { VMSTATE_INT32(ledstate, PS2KbdState), VMSTATE_END_OF_LIST() @@ -717,13 +718,9 @@ static const VMStateDescription vmstate_ps2_keyboard = { VMSTATE_INT32_V(scancode_set, PS2KbdState,3), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_ps2_keyboard_ledstate, - .needed = ps2_keyboard_ledstate_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_ps2_keyboard_ledstate, + NULL } }; diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index d595d63a51..0032b97c5f 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -369,6 +369,7 @@ static const VMStateDescription vmstate_apic_common_sipi = { .name = "apic_sipi", .version_id = 1, .minimum_version_id = 1, + .needed = apic_common_sipi_needed, .fields = (VMStateField[]) { VMSTATE_INT32(sipi_vector, APICCommonState), VMSTATE_INT32(wait_for_sipi, APICCommonState), @@ -408,12 +409,9 @@ static const VMStateDescription vmstate_apic_common = { APICCommonState), /* open-coded timer state */ VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_apic_common_sipi, - .needed = apic_common_sipi_needed, - }, - VMSTATE_END_OF_LIST() + .subsections = (const VMStateDescription*[]) { + &vmstate_apic_common_sipi, + NULL } }; diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 71a9f7a716..b3e0b1fd52 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -650,6 +650,7 @@ static const VMStateDescription vmstate_ich9_rst_cnt = { .name = "ICH9LPC/rst_cnt", .version_id = 1, .minimum_version_id = 1, + .needed = ich9_rst_cnt_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(rst_cnt, ICH9LPCState), VMSTATE_END_OF_LIST() @@ -669,12 +670,9 @@ static const VMStateDescription vmstate_ich9_lpc = { VMSTATE_UINT32(sci_level, ICH9LPCState), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_ich9_rst_cnt, - .needed = ich9_rst_cnt_needed - }, - { 0 } + .subsections = (const VMStateDescription*[]) { + &vmstate_ich9_rst_cnt, + NULL } }; diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 091d61acc3..bab8e2abfb 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1370,6 +1370,7 @@ static const VMStateDescription vmstate_e1000_mit_state = { .name = "e1000/mit_state", .version_id = 1, .minimum_version_id = 1, + .needed = e1000_mit_state_needed, .fields = (VMStateField[]) { VMSTATE_UINT32(mac_reg[RDTR], E1000State), VMSTATE_UINT32(mac_reg[RADV], E1000State), @@ -1457,13 +1458,9 @@ static const VMStateDescription vmstate_e1000 = { VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_e1000_mit_state, - .needed = e1000_mit_state_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_e1000_mit_state, + NULL } }; diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index f868108dfe..e0db4727ae 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3240,6 +3240,7 @@ static const VMStateDescription vmstate_rtl8139_hotplug_ready ={ .name = "rtl8139/hotplug_ready", .version_id = 1, .minimum_version_id = 1, + .needed = rtl8139_hotplug_ready_needed, .fields = (VMStateField[]) { VMSTATE_END_OF_LIST() } @@ -3335,13 +3336,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_rtl8139_hotplug_ready, - .needed = rtl8139_hotplug_ready_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_rtl8139_hotplug_ready, + NULL } }; diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index dfb328debd..8bcdf3ed77 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -2226,6 +2226,7 @@ static const VMStateDescription vmxstate_vmxnet3_mcast_list = { .version_id = 1, .minimum_version_id = 1, .pre_load = vmxnet3_mcast_list_pre_load, + .needed = vmxnet3_mc_list_needed, .fields = (VMStateField[]) { VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0, mcast_list_buff_size), @@ -2470,14 +2471,9 @@ static const VMStateDescription vmstate_vmxnet3 = { VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmxstate_vmxnet3_mcast_list, - .needed = vmxnet3_mc_list_needed - }, - { - /* empty element. */ - } + .subsections = (const VMStateDescription*[]) { + &vmxstate_vmxnet3_mcast_list, + NULL } }; diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index f1712b86fe..ed2424c4cd 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -582,6 +582,7 @@ static const VMStateDescription vmstate_piix3_rcr = { .name = "PIIX3/rcr", .version_id = 1, .minimum_version_id = 1, + .needed = piix3_rcr_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(rcr, PIIX3State), VMSTATE_END_OF_LIST() @@ -600,12 +601,9 @@ static const VMStateDescription vmstate_piix3 = { PIIX_NUM_PIRQS, 3), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_piix3_rcr, - .needed = piix3_rcr_needed, - }, - { 0 } + .subsections = (const VMStateDescription*[]) { + &vmstate_piix3_rcr, + NULL } }; diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index bd2c0e4caa..f50b2f08af 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1968,6 +1968,7 @@ static const VMStateDescription vmstate_scsi_sense_state = { .name = "SCSIDevice/sense", .version_id = 1, .minimum_version_id = 1, + .needed = scsi_sense_state_needed, .fields = (VMStateField[]) { VMSTATE_UINT8_SUB_ARRAY(sense, SCSIDevice, SCSI_SENSE_BUF_SIZE_OLD, @@ -1998,13 +1999,9 @@ const VMStateDescription vmstate_scsi_device = { }, VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_scsi_sense_state, - .needed = scsi_sense_state_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_scsi_sense_state, + NULL } }; diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index b6b8a2063d..b50071ef93 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -283,6 +283,7 @@ static const VMStateDescription vmstate_hpet_rtc_irq_level = { .name = "hpet/rtc_irq_level", .version_id = 1, .minimum_version_id = 1, + .needed = hpet_rtc_irq_level_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(rtc_irq_level, HPETState), VMSTATE_END_OF_LIST() @@ -322,13 +323,9 @@ static const VMStateDescription vmstate_hpet = { vmstate_hpet_timer, HPETTimer), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_hpet_rtc_irq_level, - .needed = hpet_rtc_irq_level_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_hpet_rtc_irq_level, + NULL } }; diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index f2b77fa118..32048258c9 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -733,22 +733,23 @@ static int rtc_post_load(void *opaque, int version_id) return 0; } +static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) +{ + RTCState *s = (RTCState *)opaque; + return s->irq_reinject_on_ack_count != 0; +} + static const VMStateDescription vmstate_rtc_irq_reinject_on_ack_count = { .name = "mc146818rtc/irq_reinject_on_ack_count", .version_id = 1, .minimum_version_id = 1, + .needed = rtc_irq_reinject_on_ack_count_needed, .fields = (VMStateField[]) { VMSTATE_UINT16(irq_reinject_on_ack_count, RTCState), VMSTATE_END_OF_LIST() } }; -static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) -{ - RTCState *s = (RTCState *)opaque; - return s->irq_reinject_on_ack_count != 0; -} - static const VMStateDescription vmstate_rtc = { .name = "mc146818rtc", .version_id = 3, @@ -770,13 +771,9 @@ static const VMStateDescription vmstate_rtc = { VMSTATE_UINT64_V(next_alarm_time, RTCState, 3), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_rtc_irq_reinject_on_ack_count, - .needed = rtc_irq_reinject_on_ack_count_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_rtc_irq_reinject_on_ack_count, + NULL } }; diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 1a22c9c0cb..7d65818064 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -2034,6 +2034,7 @@ static const VMStateDescription vmstate_ohci_eof_timer = { .version_id = 1, .minimum_version_id = 1, .pre_load = ohci_eof_timer_pre_load, + .needed = ohci_eof_timer_needed, .fields = (VMStateField[]) { VMSTATE_TIMER_PTR(eof_timer, OHCIState), VMSTATE_END_OF_LIST() @@ -2081,13 +2082,9 @@ static const VMStateDescription vmstate_ohci_state = { VMSTATE_BOOL(async_complete, OHCIState), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_ohci_eof_timer, - .needed = ohci_eof_timer_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_ohci_eof_timer, + NULL } }; diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index 242a654583..6b4218c037 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -2257,40 +2257,42 @@ static const VMStateInfo usbredir_ep_bufpq_vmstate_info = { /* For endp_data migration */ +static bool usbredir_bulk_receiving_needed(void *priv) +{ + struct endp_data *endp = priv; + + return endp->bulk_receiving_started; +} + static const VMStateDescription usbredir_bulk_receiving_vmstate = { .name = "usb-redir-ep/bulk-receiving", .version_id = 1, .minimum_version_id = 1, + .needed = usbredir_bulk_receiving_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(bulk_receiving_started, struct endp_data), VMSTATE_END_OF_LIST() } }; -static bool usbredir_bulk_receiving_needed(void *priv) +static bool usbredir_stream_needed(void *priv) { struct endp_data *endp = priv; - return endp->bulk_receiving_started; + return endp->max_streams; } static const VMStateDescription usbredir_stream_vmstate = { .name = "usb-redir-ep/stream-state", .version_id = 1, .minimum_version_id = 1, + .needed = usbredir_stream_needed, .fields = (VMStateField[]) { VMSTATE_UINT32(max_streams, struct endp_data), VMSTATE_END_OF_LIST() } }; -static bool usbredir_stream_needed(void *priv) -{ - struct endp_data *endp = priv; - - return endp->max_streams; -} - static const VMStateDescription usbredir_ep_vmstate = { .name = "usb-redir-ep", .version_id = 1, @@ -2318,16 +2320,10 @@ static const VMStateDescription usbredir_ep_vmstate = { VMSTATE_INT32(bufpq_target_size, struct endp_data), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &usbredir_bulk_receiving_vmstate, - .needed = usbredir_bulk_receiving_needed, - }, { - .vmsd = &usbredir_stream_vmstate, - .needed = usbredir_stream_needed, - }, { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &usbredir_bulk_receiving_vmstate, + &usbredir_stream_vmstate, + NULL } }; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index fb49ffcb2d..ee4e07c5e7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1053,6 +1053,7 @@ static const VMStateDescription vmstate_virtio_device_endian = { .name = "virtio/device_endian", .version_id = 1, .minimum_version_id = 1, + .needed = &virtio_device_endian_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(device_endian, VirtIODevice), VMSTATE_END_OF_LIST() @@ -1063,6 +1064,7 @@ static const VMStateDescription vmstate_virtio_64bit_features = { .name = "virtio/64bit_features", .version_id = 1, .minimum_version_id = 1, + .needed = &virtio_64bit_features_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(guest_features, VirtIODevice), VMSTATE_END_OF_LIST() @@ -1077,16 +1079,10 @@ static const VMStateDescription vmstate_virtio = { .fields = (VMStateField[]) { VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_virtio_device_endian, - .needed = &virtio_device_endian_needed - }, - { - .vmsd = &vmstate_virtio_64bit_features, - .needed = &virtio_64bit_features_needed - }, - { 0 } + .subsections = (const VMStateDescription*[]) { + &vmstate_virtio_device_endian, + &vmstate_virtio_64bit_features, + NULL } }; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 43428bd030..de8a7200a9 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -126,10 +126,10 @@ void cpu_flush_icache_range(hwaddr start, int len); extern struct MemoryRegion io_mem_rom; extern struct MemoryRegion io_mem_notdirty; -typedef void (RAMBlockIterFunc)(void *host_addr, +typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr, ram_addr_t offset, ram_addr_t length, void *opaque); -void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); +int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); #endif diff --git a/include/migration/migration.h b/include/migration/migration.h index a6e025a248..9387c8c9d4 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -34,6 +34,7 @@ #define QEMU_VM_SECTION_FULL 0x04 #define QEMU_VM_SUBSECTION 0x05 #define QEMU_VM_VMDESCRIPTION 0x06 +#define QEMU_VM_SECTION_FOOTER 0x7e struct MigrationParams { bool blk; @@ -42,6 +43,20 @@ struct MigrationParams { typedef struct MigrationState MigrationState; +typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head; + +/* State for the incoming migration */ +struct MigrationIncomingState { + QEMUFile *file; + + /* See savevm.c */ + LoadStateEntry_Head loadvm_handlers; +}; + +MigrationIncomingState *migration_incoming_get_current(void); +MigrationIncomingState *migration_incoming_state_new(QEMUFile *f); +void migration_incoming_state_destroy(void); + struct MigrationState { int64_t bandwidth_limit; @@ -180,4 +195,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent); +void ram_mig_init(void); +void savevm_skip_section_footers(void); #endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index a01c5b817e..4f67d79227 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -157,7 +157,7 @@ static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v) void qemu_put_be16(QEMUFile *f, unsigned int v); void qemu_put_be32(QEMUFile *f, unsigned int v); void qemu_put_be64(QEMUFile *f, uint64_t v); -int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset); +int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset); int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size); ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, int level); @@ -312,4 +312,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) { qemu_get_be64s(f, (uint64_t *)pv); } + +size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); + #endif diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index bc7616aaa8..7153b1e145 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -120,11 +120,6 @@ typedef struct { bool (*field_exists)(void *opaque, int version_id); } VMStateField; -typedef struct VMStateSubsection { - const VMStateDescription *vmsd; - bool (*needed)(void *opaque); -} VMStateSubsection; - struct VMStateDescription { const char *name; int unmigratable; @@ -135,8 +130,9 @@ struct VMStateDescription { int (*pre_load)(void *opaque); int (*post_load)(void *opaque, int version_id); void (*pre_save)(void *opaque); + bool (*needed)(void *opaque); VMStateField *fields; - const VMStateSubsection *subsections; + const VMStateDescription **subsections; }; extern const VMStateDescription vmstate_dummy; @@ -812,6 +808,8 @@ extern const VMStateInfo vmstate_info_bitmap; #define SELF_ANNOUNCE_ROUNDS 5 +void loadvm_free_handlers(MigrationIncomingState *mis); + int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, int version_id); void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index cde3314896..6fdcbcd524 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -31,6 +31,7 @@ typedef struct I2CBus I2CBus; typedef struct I2SCodec I2SCodec; typedef struct ISABus ISABus; typedef struct ISADevice ISADevice; +typedef struct LoadStateEntry LoadStateEntry; typedef struct MACAddr MACAddr; typedef struct MachineClass MachineClass; typedef struct MachineState MachineState; @@ -38,6 +39,7 @@ typedef struct MemoryListener MemoryListener; typedef struct MemoryMappingList MemoryMappingList; typedef struct MemoryRegion MemoryRegion; typedef struct MemoryRegionSection MemoryRegionSection; +typedef struct MigrationIncomingState MigrationIncomingState; typedef struct MigrationParams MigrationParams; typedef struct Monitor Monitor; typedef struct MouseTransformInfo MouseTransformInfo; diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h index 54b36c16c4..c38892fec6 100644 --- a/include/sysemu/arch_init.h +++ b/include/sysemu/arch_init.h @@ -30,7 +30,6 @@ extern const uint32_t arch_type; void select_soundhw(const char *optarg); void do_acpitable_option(const QemuOpts *opts); void do_smbios_option(QemuOpts *opts); -void ram_mig_init(void); void cpudef_init(void); void audio_init(void); int kvm_available(void); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 853d90a317..ef793f702e 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -84,6 +84,7 @@ void qemu_announce_self(void); bool qemu_savevm_state_blocked(Error **errp); void qemu_savevm_state_begin(QEMUFile *f, const MigrationParams *params); +void qemu_savevm_state_header(QEMUFile *f); int qemu_savevm_state_iterate(QEMUFile *f); void qemu_savevm_state_complete(QEMUFile *f); void qemu_savevm_state_cancel(void); diff --git a/migration/migration.c b/migration/migration.c index 732d229708..b04b4571a8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -53,6 +53,7 @@ static bool deferred_incoming; migrations at once. For now we don't need to add dynamic creation of migration */ +/* For outgoing */ MigrationState *migrate_get_current(void) { static MigrationState current_migration = { @@ -71,6 +72,30 @@ MigrationState *migrate_get_current(void) return ¤t_migration; } +/* For incoming */ +static MigrationIncomingState *mis_current; + +MigrationIncomingState *migration_incoming_get_current(void) +{ + return mis_current; +} + +MigrationIncomingState *migration_incoming_state_new(QEMUFile* f) +{ + mis_current = g_malloc0(sizeof(MigrationIncomingState)); + mis_current->file = f; + QLIST_INIT(&mis_current->loadvm_handlers); + + return mis_current; +} + +void migration_incoming_state_destroy(void) +{ + loadvm_free_handlers(mis_current); + g_free(mis_current); + mis_current = NULL; +} + /* * Called on -incoming with a defer: uri. * The migration can be started later after any parameters have been @@ -115,9 +140,14 @@ static void process_incoming_migration_co(void *opaque) Error *local_err = NULL; int ret; + migration_incoming_state_new(f); + ret = qemu_loadvm_state(f); + qemu_fclose(f); free_xbzrle_decoded_buf(); + migration_incoming_state_destroy(); + if (ret < 0) { error_report("load of migration failed: %s", strerror(-ret)); migrate_decompress_threads_join(); @@ -738,6 +768,7 @@ static void *migration_thread(void *opaque) int64_t start_time = initial_time; bool old_vm_running = false; + qemu_savevm_state_header(s->file); qemu_savevm_state_begin(s->file, &s->params); s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; @@ -838,9 +869,6 @@ static void *migration_thread(void *opaque) void migrate_fd_connect(MigrationState *s) { - s->state = MIGRATION_STATUS_SETUP; - trace_migrate_set_state(MIGRATION_STATUS_SETUP); - /* This is a best 1st approximation. ns to ms */ s->expected_downtime = max_downtime/1000000; s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 2750365a7e..965a757772 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -349,14 +349,14 @@ void qemu_file_skip(QEMUFile *f, int size) } /* - * Read 'size' bytes from file (at 'offset') into buf without moving the - * pointer. + * Read 'size' bytes from file (at 'offset') without moving the + * pointer and set 'buf' to point to that data. * * It will return size bytes unless there was an error, in which case it will * return as many as it managed to read (assuming blocking fd's which * all current QEMUFile are) */ -int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset) +int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset) { int pending; int index; @@ -392,7 +392,7 @@ int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset) size = pending; } - memcpy(buf, f->buf + index, size); + *buf = f->buf + index; return size; } @@ -411,11 +411,13 @@ int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size) while (pending > 0) { int res; + uint8_t *src; - res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0); + res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0); if (res == 0) { return done; } + memcpy(buf, src, res); qemu_file_skip(f, res); buf += res; pending -= res; @@ -585,3 +587,20 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) } return len; } + +/* + * Get a string whose length is determined by a single preceding byte + * A preallocated 256 byte buffer must be passed in. + * Returns: len on success and a 0 terminated string in the buffer + * else 0 + * (Note a 0 length string will return 0 either way) + */ +size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) +{ + size_t len = qemu_get_byte(f); + size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); + + buf[res] = 0; + + return res == len ? res : 0; +} diff --git a/migration/ram.c b/migration/ram.c new file mode 100644 index 0000000000..57368e1575 --- /dev/null +++ b/migration/ram.c @@ -0,0 +1,1628 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2011-2015 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <stdint.h> +#include <zlib.h> +#include "qemu/bitops.h" +#include "qemu/bitmap.h" +#include "qemu/timer.h" +#include "qemu/main-loop.h" +#include "migration/migration.h" +#include "exec/address-spaces.h" +#include "migration/page_cache.h" +#include "qemu/error-report.h" +#include "trace.h" +#include "exec/ram_addr.h" +#include "qemu/rcu_queue.h" + +#ifdef DEBUG_MIGRATION_RAM +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +static bool mig_throttle_on; +static int dirty_rate_high_cnt; +static void check_guest_throttling(void); + +static uint64_t bitmap_sync_count; + +/***********************************************************/ +/* ram save/restore */ + +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ +#define RAM_SAVE_FLAG_COMPRESS 0x02 +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 +#define RAM_SAVE_FLAG_PAGE 0x08 +#define RAM_SAVE_FLAG_EOS 0x10 +#define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 +/* 0x80 is reserved in migration.h start with 0x100 next */ +#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 + +static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; + +static inline bool is_zero_range(uint8_t *p, uint64_t size) +{ + return buffer_find_nonzero_offset(p, size) == size; +} + +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* buffer for storing page content */ + uint8_t *current_buf; + /* Cache for XBZRLE, Protected by lock. */ + PageCache *cache; + QemuMutex lock; +} XBZRLE; + +/* buffer used for XBZRLE decoding */ +static uint8_t *xbzrle_decoded_buf; + +static void XBZRLE_cache_lock(void) +{ + if (migrate_use_xbzrle()) + qemu_mutex_lock(&XBZRLE.lock); +} + +static void XBZRLE_cache_unlock(void) +{ + if (migrate_use_xbzrle()) + qemu_mutex_unlock(&XBZRLE.lock); +} + +/* + * called from qmp_migrate_set_cache_size in main thread, possibly while + * a migration is in progress. + * A running migration maybe using the cache and might finish during this + * call, hence changes to the cache are protected by XBZRLE.lock(). + */ +int64_t xbzrle_cache_resize(int64_t new_size) +{ + PageCache *new_cache; + int64_t ret; + + if (new_size < TARGET_PAGE_SIZE) { + return -1; + } + + XBZRLE_cache_lock(); + + if (XBZRLE.cache != NULL) { + if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { + goto out_new_size; + } + new_cache = cache_init(new_size / TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!new_cache) { + error_report("Error creating cache"); + ret = -1; + goto out; + } + + cache_fini(XBZRLE.cache); + XBZRLE.cache = new_cache; + } + +out_new_size: + ret = pow2floor(new_size); +out: + XBZRLE_cache_unlock(); + return ret; +} + +/* accounting for migration statistics */ +typedef struct AccountingInfo { + uint64_t dup_pages; + uint64_t skipped_pages; + uint64_t norm_pages; + uint64_t iterations; + uint64_t xbzrle_bytes; + uint64_t xbzrle_pages; + uint64_t xbzrle_cache_miss; + double xbzrle_cache_miss_rate; + uint64_t xbzrle_overflows; +} AccountingInfo; + +static AccountingInfo acct_info; + +static void acct_clear(void) +{ + memset(&acct_info, 0, sizeof(acct_info)); +} + +uint64_t dup_mig_bytes_transferred(void) +{ + return acct_info.dup_pages * TARGET_PAGE_SIZE; +} + +uint64_t dup_mig_pages_transferred(void) +{ + return acct_info.dup_pages; +} + +uint64_t skipped_mig_bytes_transferred(void) +{ + return acct_info.skipped_pages * TARGET_PAGE_SIZE; +} + +uint64_t skipped_mig_pages_transferred(void) +{ + return acct_info.skipped_pages; +} + +uint64_t norm_mig_bytes_transferred(void) +{ + return acct_info.norm_pages * TARGET_PAGE_SIZE; +} + +uint64_t norm_mig_pages_transferred(void) +{ + return acct_info.norm_pages; +} + +uint64_t xbzrle_mig_bytes_transferred(void) +{ + return acct_info.xbzrle_bytes; +} + +uint64_t xbzrle_mig_pages_transferred(void) +{ + return acct_info.xbzrle_pages; +} + +uint64_t xbzrle_mig_pages_cache_miss(void) +{ + return acct_info.xbzrle_cache_miss; +} + +double xbzrle_mig_cache_miss_rate(void) +{ + return acct_info.xbzrle_cache_miss_rate; +} + +uint64_t xbzrle_mig_pages_overflow(void) +{ + return acct_info.xbzrle_overflows; +} + +/* This is the last block that we have visited serching for dirty pages + */ +static RAMBlock *last_seen_block; +/* This is the last block from where we have sent data */ +static RAMBlock *last_sent_block; +static ram_addr_t last_offset; +static unsigned long *migration_bitmap; +static uint64_t migration_dirty_pages; +static uint32_t last_version; +static bool ram_bulk_stage; + +struct CompressParam { + bool start; + bool done; + QEMUFile *file; + QemuMutex mutex; + QemuCond cond; + RAMBlock *block; + ram_addr_t offset; +}; +typedef struct CompressParam CompressParam; + +struct DecompressParam { + bool start; + QemuMutex mutex; + QemuCond cond; + void *des; + uint8 *compbuf; + int len; +}; +typedef struct DecompressParam DecompressParam; + +static CompressParam *comp_param; +static QemuThread *compress_threads; +/* comp_done_cond is used to wake up the migration thread when + * one of the compression threads has finished the compression. + * comp_done_lock is used to co-work with comp_done_cond. + */ +static QemuMutex *comp_done_lock; +static QemuCond *comp_done_cond; +/* The empty QEMUFileOps will be used by file in CompressParam */ +static const QEMUFileOps empty_ops = { }; + +static bool compression_switch; +static bool quit_comp_thread; +static bool quit_decomp_thread; +static DecompressParam *decomp_param; +static QemuThread *decompress_threads; +static uint8_t *compressed_data_buf; + +static int do_compress_ram_page(CompressParam *param); + +static void *do_data_compress(void *opaque) +{ + CompressParam *param = opaque; + + while (!quit_comp_thread) { + qemu_mutex_lock(¶m->mutex); + /* Re-check the quit_comp_thread in case of + * terminate_compression_threads is called just before + * qemu_mutex_lock(¶m->mutex) and after + * while(!quit_comp_thread), re-check it here can make + * sure the compression thread terminate as expected. + */ + while (!param->start && !quit_comp_thread) { + qemu_cond_wait(¶m->cond, ¶m->mutex); + } + if (!quit_comp_thread) { + do_compress_ram_page(param); + } + param->start = false; + qemu_mutex_unlock(¶m->mutex); + + qemu_mutex_lock(comp_done_lock); + param->done = true; + qemu_cond_signal(comp_done_cond); + qemu_mutex_unlock(comp_done_lock); + } + + return NULL; +} + +static inline void terminate_compression_threads(void) +{ + int idx, thread_count; + + thread_count = migrate_compress_threads(); + quit_comp_thread = true; + for (idx = 0; idx < thread_count; idx++) { + qemu_mutex_lock(&comp_param[idx].mutex); + qemu_cond_signal(&comp_param[idx].cond); + qemu_mutex_unlock(&comp_param[idx].mutex); + } +} + +void migrate_compress_threads_join(void) +{ + int i, thread_count; + + if (!migrate_use_compression()) { + return; + } + terminate_compression_threads(); + thread_count = migrate_compress_threads(); + for (i = 0; i < thread_count; i++) { + qemu_thread_join(compress_threads + i); + qemu_fclose(comp_param[i].file); + qemu_mutex_destroy(&comp_param[i].mutex); + qemu_cond_destroy(&comp_param[i].cond); + } + qemu_mutex_destroy(comp_done_lock); + qemu_cond_destroy(comp_done_cond); + g_free(compress_threads); + g_free(comp_param); + g_free(comp_done_cond); + g_free(comp_done_lock); + compress_threads = NULL; + comp_param = NULL; + comp_done_cond = NULL; + comp_done_lock = NULL; +} + +void migrate_compress_threads_create(void) +{ + int i, thread_count; + + if (!migrate_use_compression()) { + return; + } + quit_comp_thread = false; + compression_switch = true; + thread_count = migrate_compress_threads(); + compress_threads = g_new0(QemuThread, thread_count); + comp_param = g_new0(CompressParam, thread_count); + comp_done_cond = g_new0(QemuCond, 1); + comp_done_lock = g_new0(QemuMutex, 1); + qemu_cond_init(comp_done_cond); + qemu_mutex_init(comp_done_lock); + for (i = 0; i < thread_count; i++) { + /* com_param[i].file is just used as a dummy buffer to save data, set + * it's ops to empty. + */ + comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); + comp_param[i].done = true; + qemu_mutex_init(&comp_param[i].mutex); + qemu_cond_init(&comp_param[i].cond); + qemu_thread_create(compress_threads + i, "compress", + do_data_compress, comp_param + i, + QEMU_THREAD_JOINABLE); + } +} + +/** + * save_page_header: Write page header to wire + * + * If this is the 1st block, it also writes the block identification + * + * Returns: Number of bytes written + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * in the lower bits, it contains flags + */ +static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) +{ + size_t size; + + qemu_put_be64(f, offset); + size = 8; + + if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, + strlen(block->idstr)); + size += 1 + strlen(block->idstr); + } + return size; +} + +/* Update the xbzrle cache to reflect a page that's been sent as all 0. + * The important thing is that a stale (not-yet-0'd) page be replaced + * by the new data. + * As a bonus, if the page wasn't in the cache it gets added so that + * when a small write is made into the 0'd page it gets XBZRLE sent + */ +static void xbzrle_cache_zero_page(ram_addr_t current_addr) +{ + if (ram_bulk_stage || !migrate_use_xbzrle()) { + return; + } + + /* We don't care if this fails to allocate a new cache page + * as long as it updated an old one */ + cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, + bitmap_sync_count); +} + +#define ENCODING_FLAG_XBZRLE 0x1 + +/** + * save_xbzrle_page: compress and send current page + * + * Returns: 1 means that we wrote the page + * 0 means that page is identical to the one already sent + * -1 means that xbzrle would be longer than normal + * + * @f: QEMUFile where to send the data + * @current_data: + * @current_addr: + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ +static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, bool last_stage, + uint64_t *bytes_transferred) +{ + int encoded_len = 0, bytes_xbzrle; + uint8_t *prev_cached_page; + + if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { + acct_info.xbzrle_cache_miss++; + if (!last_stage) { + if (cache_insert(XBZRLE.cache, current_addr, *current_data, + bitmap_sync_count) == -1) { + return -1; + } else { + /* update *current_data when the page has been + inserted into cache */ + *current_data = get_cached_data(XBZRLE.cache, current_addr); + } + } + return -1; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* save current buffer into memory */ + memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + DPRINTF("Skipping unmodified page\n"); + return 0; + } else if (encoded_len == -1) { + DPRINTF("Overflow\n"); + acct_info.xbzrle_overflows++; + /* update data in the cache */ + if (!last_stage) { + memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); + *current_data = prev_cached_page; + } + return -1; + } + + /* we need to update the data in the cache, in order to get the same data */ + if (!last_stage) { + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); + } + + /* Send XBZRLE based compressed page */ + bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); + qemu_put_be16(f, encoded_len); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_xbzrle += encoded_len + 1 + 2; + acct_info.xbzrle_pages++; + acct_info.xbzrle_bytes += bytes_xbzrle; + *bytes_transferred += bytes_xbzrle; + + return 1; +} + +static inline +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, + ram_addr_t start) +{ + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; + unsigned long nr = base + (start >> TARGET_PAGE_BITS); + uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); + unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); + + unsigned long next; + + if (ram_bulk_stage && nr > base) { + next = nr + 1; + } else { + next = find_next_bit(migration_bitmap, size, nr); + } + + if (next < size) { + clear_bit(next, migration_bitmap); + migration_dirty_pages--; + } + return (next - base) << TARGET_PAGE_BITS; +} + +static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) +{ + migration_dirty_pages += + cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length); +} + + +/* Fix me: there are too many global variables used in migration process. */ +static int64_t start_time; +static int64_t bytes_xfer_prev; +static int64_t num_dirty_pages_period; +static uint64_t xbzrle_cache_miss_prev; +static uint64_t iterations_prev; + +static void migration_bitmap_sync_init(void) +{ + start_time = 0; + bytes_xfer_prev = 0; + num_dirty_pages_period = 0; + xbzrle_cache_miss_prev = 0; + iterations_prev = 0; +} + +/* Called with iothread lock held, to protect ram_list.dirty_memory[] */ +static void migration_bitmap_sync(void) +{ + RAMBlock *block; + uint64_t num_dirty_pages_init = migration_dirty_pages; + MigrationState *s = migrate_get_current(); + int64_t end_time; + int64_t bytes_xfer_now; + + bitmap_sync_count++; + + if (!bytes_xfer_prev) { + bytes_xfer_prev = ram_bytes_transferred(); + } + + if (!start_time) { + start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + } + + trace_migration_bitmap_sync_start(); + address_space_sync_dirty_bitmap(&address_space_memory); + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); + } + rcu_read_unlock(); + + trace_migration_bitmap_sync_end(migration_dirty_pages + - num_dirty_pages_init); + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; + end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + /* more than 1 second = 1000 millisecons */ + if (end_time > start_time + 1000) { + if (migrate_auto_converge()) { + /* The following detection logic can be refined later. For now: + Check to see if the dirtied bytes is 50% more than the approx. + amount of bytes that just got transferred since the last time we + were in this routine. If that happens >N times (for now N==4) + we turn on the throttle down logic */ + bytes_xfer_now = ram_bytes_transferred(); + if (s->dirty_pages_rate && + (num_dirty_pages_period * TARGET_PAGE_SIZE > + (bytes_xfer_now - bytes_xfer_prev)/2) && + (dirty_rate_high_cnt++ > 4)) { + trace_migration_throttle(); + mig_throttle_on = true; + dirty_rate_high_cnt = 0; + } + bytes_xfer_prev = bytes_xfer_now; + } else { + mig_throttle_on = false; + } + if (migrate_use_xbzrle()) { + if (iterations_prev != acct_info.iterations) { + acct_info.xbzrle_cache_miss_rate = + (double)(acct_info.xbzrle_cache_miss - + xbzrle_cache_miss_prev) / + (acct_info.iterations - iterations_prev); + } + iterations_prev = acct_info.iterations; + xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; + } + s->dirty_pages_rate = num_dirty_pages_period * 1000 + / (end_time - start_time); + s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; + start_time = end_time; + num_dirty_pages_period = 0; + } + s->dirty_sync_count = bitmap_sync_count; +} + +/** + * save_zero_page: Send the zero page to the stream + * + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @p: pointer to the page + * @bytes_transferred: increase it with the number of transferred bytes + */ +static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, + uint8_t *p, uint64_t *bytes_transferred) +{ + int pages = -1; + + if (is_zero_range(p, TARGET_PAGE_SIZE)) { + acct_info.dup_pages++; + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_COMPRESS); + qemu_put_byte(f, 0); + *bytes_transferred += 1; + pages = 1; + } + + return pages; +} + +/** + * ram_save_page: Send the given page to the stream + * + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ +static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, + bool last_stage, uint64_t *bytes_transferred) +{ + int pages = -1; + uint64_t bytes_xmit; + ram_addr_t current_addr; + MemoryRegion *mr = block->mr; + uint8_t *p; + int ret; + bool send_async = true; + + p = memory_region_get_ram_ptr(mr) + offset; + + /* In doubt sent page as normal */ + bytes_xmit = 0; + ret = ram_control_save_page(f, block->offset, + offset, TARGET_PAGE_SIZE, &bytes_xmit); + if (bytes_xmit) { + *bytes_transferred += bytes_xmit; + pages = 1; + } + + XBZRLE_cache_lock(); + + current_addr = block->offset + offset; + + if (block == last_sent_block) { + offset |= RAM_SAVE_FLAG_CONTINUE; + } + if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { + if (ret != RAM_SAVE_CONTROL_DELAYED) { + if (bytes_xmit > 0) { + acct_info.norm_pages++; + } else if (bytes_xmit == 0) { + acct_info.dup_pages++; + } + } + } else { + pages = save_zero_page(f, block, offset, p, bytes_transferred); + if (pages > 0) { + /* Must let xbzrle know, otherwise a previous (now 0'd) cached + * page would be stale + */ + xbzrle_cache_zero_page(current_addr); + } else if (!ram_bulk_stage && migrate_use_xbzrle()) { + pages = save_xbzrle_page(f, &p, current_addr, block, + offset, last_stage, bytes_transferred); + if (!last_stage) { + /* Can't send this cached data async, since the cache page + * might get updated before it gets to the wire + */ + send_async = false; + } + } + } + + /* XBZRLE overflow or normal page */ + if (pages == -1) { + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_PAGE); + if (send_async) { + qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); + } else { + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); + } + *bytes_transferred += TARGET_PAGE_SIZE; + pages = 1; + acct_info.norm_pages++; + } + + XBZRLE_cache_unlock(); + + return pages; +} + +static int do_compress_ram_page(CompressParam *param) +{ + int bytes_sent, blen; + uint8_t *p; + RAMBlock *block = param->block; + ram_addr_t offset = param->offset; + + p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK); + + bytes_sent = save_page_header(param->file, block, offset | + RAM_SAVE_FLAG_COMPRESS_PAGE); + blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE, + migrate_compress_level()); + bytes_sent += blen; + + return bytes_sent; +} + +static inline void start_compression(CompressParam *param) +{ + param->done = false; + qemu_mutex_lock(¶m->mutex); + param->start = true; + qemu_cond_signal(¶m->cond); + qemu_mutex_unlock(¶m->mutex); +} + +static inline void start_decompression(DecompressParam *param) +{ + qemu_mutex_lock(¶m->mutex); + param->start = true; + qemu_cond_signal(¶m->cond); + qemu_mutex_unlock(¶m->mutex); +} + +static uint64_t bytes_transferred; + +static void flush_compressed_data(QEMUFile *f) +{ + int idx, len, thread_count; + + if (!migrate_use_compression()) { + return; + } + thread_count = migrate_compress_threads(); + for (idx = 0; idx < thread_count; idx++) { + if (!comp_param[idx].done) { + qemu_mutex_lock(comp_done_lock); + while (!comp_param[idx].done && !quit_comp_thread) { + qemu_cond_wait(comp_done_cond, comp_done_lock); + } + qemu_mutex_unlock(comp_done_lock); + } + if (!quit_comp_thread) { + len = qemu_put_qemu_file(f, comp_param[idx].file); + bytes_transferred += len; + } + } +} + +static inline void set_compress_params(CompressParam *param, RAMBlock *block, + ram_addr_t offset) +{ + param->block = block; + param->offset = offset; +} + +static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, + ram_addr_t offset, + uint64_t *bytes_transferred) +{ + int idx, thread_count, bytes_xmit = -1, pages = -1; + + thread_count = migrate_compress_threads(); + qemu_mutex_lock(comp_done_lock); + while (true) { + for (idx = 0; idx < thread_count; idx++) { + if (comp_param[idx].done) { + bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file); + set_compress_params(&comp_param[idx], block, offset); + start_compression(&comp_param[idx]); + pages = 1; + acct_info.norm_pages++; + *bytes_transferred += bytes_xmit; + break; + } + } + if (pages > 0) { + break; + } else { + qemu_cond_wait(comp_done_cond, comp_done_lock); + } + } + qemu_mutex_unlock(comp_done_lock); + + return pages; +} + +/** + * ram_save_compressed_page: compress the given page and send it to the stream + * + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ +static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, + ram_addr_t offset, bool last_stage, + uint64_t *bytes_transferred) +{ + int pages = -1; + uint64_t bytes_xmit; + MemoryRegion *mr = block->mr; + uint8_t *p; + int ret; + + p = memory_region_get_ram_ptr(mr) + offset; + + bytes_xmit = 0; + ret = ram_control_save_page(f, block->offset, + offset, TARGET_PAGE_SIZE, &bytes_xmit); + if (bytes_xmit) { + *bytes_transferred += bytes_xmit; + pages = 1; + } + if (block == last_sent_block) { + offset |= RAM_SAVE_FLAG_CONTINUE; + } + if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { + if (ret != RAM_SAVE_CONTROL_DELAYED) { + if (bytes_xmit > 0) { + acct_info.norm_pages++; + } else if (bytes_xmit == 0) { + acct_info.dup_pages++; + } + } + } else { + /* When starting the process of a new block, the first page of + * the block should be sent out before other pages in the same + * block, and all the pages in last block should have been sent + * out, keeping this order is important, because the 'cont' flag + * is used to avoid resending the block name. + */ + if (block != last_sent_block) { + flush_compressed_data(f); + pages = save_zero_page(f, block, offset, p, bytes_transferred); + if (pages == -1) { + set_compress_params(&comp_param[0], block, offset); + /* Use the qemu thread to compress the data to make sure the + * first page is sent out before other pages + */ + bytes_xmit = do_compress_ram_page(&comp_param[0]); + acct_info.norm_pages++; + qemu_put_qemu_file(f, comp_param[0].file); + *bytes_transferred += bytes_xmit; + pages = 1; + } + } else { + pages = save_zero_page(f, block, offset, p, bytes_transferred); + if (pages == -1) { + pages = compress_page_with_multi_thread(f, block, offset, + bytes_transferred); + } + } + } + + return pages; +} + +/** + * ram_find_and_save_block: Finds a dirty page and sends it to f + * + * Called within an RCU critical section. + * + * Returns: The number of pages written + * 0 means no dirty pages + * + * @f: QEMUFile where to send the data + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ + +static int ram_find_and_save_block(QEMUFile *f, bool last_stage, + uint64_t *bytes_transferred) +{ + RAMBlock *block = last_seen_block; + ram_addr_t offset = last_offset; + bool complete_round = false; + int pages = 0; + MemoryRegion *mr; + + if (!block) + block = QLIST_FIRST_RCU(&ram_list.blocks); + + while (true) { + mr = block->mr; + offset = migration_bitmap_find_and_reset_dirty(mr, offset); + if (complete_round && block == last_seen_block && + offset >= last_offset) { + break; + } + if (offset >= block->used_length) { + offset = 0; + block = QLIST_NEXT_RCU(block, next); + if (!block) { + block = QLIST_FIRST_RCU(&ram_list.blocks); + complete_round = true; + ram_bulk_stage = false; + if (migrate_use_xbzrle()) { + /* If xbzrle is on, stop using the data compression at this + * point. In theory, xbzrle can do better than compression. + */ + flush_compressed_data(f); + compression_switch = false; + } + } + } else { + if (compression_switch && migrate_use_compression()) { + pages = ram_save_compressed_page(f, block, offset, last_stage, + bytes_transferred); + } else { + pages = ram_save_page(f, block, offset, last_stage, + bytes_transferred); + } + + /* if page is unmodified, continue to the next */ + if (pages > 0) { + last_sent_block = block; + break; + } + } + } + + last_seen_block = block; + last_offset = offset; + + return pages; +} + +void acct_update_position(QEMUFile *f, size_t size, bool zero) +{ + uint64_t pages = size / TARGET_PAGE_SIZE; + if (zero) { + acct_info.dup_pages += pages; + } else { + acct_info.norm_pages += pages; + bytes_transferred += size; + qemu_update_position(f, size); + } +} + +static ram_addr_t ram_save_remaining(void) +{ + return migration_dirty_pages; +} + +uint64_t ram_bytes_remaining(void) +{ + return ram_save_remaining() * TARGET_PAGE_SIZE; +} + +uint64_t ram_bytes_transferred(void) +{ + return bytes_transferred; +} + +uint64_t ram_bytes_total(void) +{ + RAMBlock *block; + uint64_t total = 0; + + rcu_read_lock(); + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) + total += block->used_length; + rcu_read_unlock(); + return total; +} + +void free_xbzrle_decoded_buf(void) +{ + g_free(xbzrle_decoded_buf); + xbzrle_decoded_buf = NULL; +} + +static void migration_end(void) +{ + if (migration_bitmap) { + memory_global_dirty_log_stop(); + g_free(migration_bitmap); + migration_bitmap = NULL; + } + + XBZRLE_cache_lock(); + if (XBZRLE.cache) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.encoded_buf); + g_free(XBZRLE.current_buf); + XBZRLE.cache = NULL; + XBZRLE.encoded_buf = NULL; + XBZRLE.current_buf = NULL; + } + XBZRLE_cache_unlock(); +} + +static void ram_migration_cancel(void *opaque) +{ + migration_end(); +} + +static void reset_ram_globals(void) +{ + last_seen_block = NULL; + last_sent_block = NULL; + last_offset = 0; + last_version = ram_list.version; + ram_bulk_stage = true; +} + +#define MAX_WAIT 50 /* ms, half buffered_file limit */ + + +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has + * long-running RCU critical section. When rcu-reclaims in the code + * start to become numerous it will be necessary to reduce the + * granularity of these critical sections. + */ + +static int ram_save_setup(QEMUFile *f, void *opaque) +{ + RAMBlock *block; + int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ + + mig_throttle_on = false; + dirty_rate_high_cnt = 0; + bitmap_sync_count = 0; + migration_bitmap_sync_init(); + + if (migrate_use_xbzrle()) { + XBZRLE_cache_lock(); + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / + TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + XBZRLE_cache_unlock(); + error_report("Error creating cache"); + return -1; + } + XBZRLE_cache_unlock(); + + /* We prefer not to abort if there is no memory */ + XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); + if (!XBZRLE.encoded_buf) { + error_report("Error allocating encoded_buf"); + return -1; + } + + XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); + if (!XBZRLE.current_buf) { + error_report("Error allocating current_buf"); + g_free(XBZRLE.encoded_buf); + XBZRLE.encoded_buf = NULL; + return -1; + } + + acct_clear(); + } + + /* iothread lock needed for ram_list.dirty_memory[] */ + qemu_mutex_lock_iothread(); + qemu_mutex_lock_ramlist(); + rcu_read_lock(); + bytes_transferred = 0; + reset_ram_globals(); + + ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; + migration_bitmap = bitmap_new(ram_bitmap_pages); + bitmap_set(migration_bitmap, 0, ram_bitmap_pages); + + /* + * Count the total number of pages used by ram blocks not including any + * gaps due to alignment or unplugs. + */ + migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; + + memory_global_dirty_log_start(); + migration_bitmap_sync(); + qemu_mutex_unlock_ramlist(); + qemu_mutex_unlock_iothread(); + + qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); + qemu_put_be64(f, block->used_length); + } + + rcu_read_unlock(); + + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + + return 0; +} + +static int ram_save_iterate(QEMUFile *f, void *opaque) +{ + int ret; + int i; + int64_t t0; + int pages_sent = 0; + + rcu_read_lock(); + if (ram_list.version != last_version) { + reset_ram_globals(); + } + + /* Read version before ram_list.blocks */ + smp_rmb(); + + ram_control_before_iterate(f, RAM_CONTROL_ROUND); + + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + i = 0; + while ((ret = qemu_file_rate_limit(f)) == 0) { + int pages; + + pages = ram_find_and_save_block(f, false, &bytes_transferred); + /* no more pages to sent */ + if (pages == 0) { + break; + } + pages_sent += pages; + acct_info.iterations++; + check_guest_throttling(); + /* we want to check in the 1st loop, just in case it was the 1st time + and we had to sync the dirty bitmap. + qemu_get_clock_ns() is a bit expensive, so we only check each some + iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; + if (t1 > MAX_WAIT) { + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", + t1, i); + break; + } + } + i++; + } + flush_compressed_data(f); + rcu_read_unlock(); + + /* + * Must occur before EOS (or any QEMUFile operation) + * because of RDMA protocol. + */ + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + bytes_transferred += 8; + + ret = qemu_file_get_error(f); + if (ret < 0) { + return ret; + } + + return pages_sent; +} + +/* Called with iothread lock */ +static int ram_save_complete(QEMUFile *f, void *opaque) +{ + rcu_read_lock(); + + migration_bitmap_sync(); + + ram_control_before_iterate(f, RAM_CONTROL_FINISH); + + /* try transferring iterative blocks of memory */ + + /* flush all remaining blocks regardless of rate limiting */ + while (true) { + int pages; + + pages = ram_find_and_save_block(f, true, &bytes_transferred); + /* no more blocks to sent */ + if (pages == 0) { + break; + } + } + + flush_compressed_data(f); + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + migration_end(); + + rcu_read_unlock(); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + + return 0; +} + +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +{ + uint64_t remaining_size; + + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + + if (remaining_size < max_size) { + qemu_mutex_lock_iothread(); + rcu_read_lock(); + migration_bitmap_sync(); + rcu_read_unlock(); + qemu_mutex_unlock_iothread(); + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + } + return remaining_size; +} + +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + unsigned int xh_len; + int xh_flags; + + if (!xbzrle_decoded_buf) { + xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE); + } + + /* extract RLE header */ + xh_flags = qemu_get_byte(f); + xh_len = qemu_get_be16(f); + + if (xh_flags != ENCODING_FLAG_XBZRLE) { + error_report("Failed to load XBZRLE page - wrong compression!"); + return -1; + } + + if (xh_len > TARGET_PAGE_SIZE) { + error_report("Failed to load XBZRLE page - len overflow!"); + return -1; + } + /* load data and decode */ + qemu_get_buffer(f, xbzrle_decoded_buf, xh_len); + + /* decode RLE */ + if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, + TARGET_PAGE_SIZE) == -1) { + error_report("Failed to load XBZRLE page - decode error!"); + return -1; + } + + return 0; +} + +/* Must be called from within a rcu critical section. + * Returns a pointer from within the RCU-protected ram_list. + */ +static inline void *host_from_stream_offset(QEMUFile *f, + ram_addr_t offset, + int flags) +{ + static RAMBlock *block = NULL; + char id[256]; + uint8_t len; + + if (flags & RAM_SAVE_FLAG_CONTINUE) { + if (!block || block->max_length <= offset) { + error_report("Ack, bad migration stream!"); + return NULL; + } + + return memory_region_get_ram_ptr(block->mr) + offset; + } + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (!strncmp(id, block->idstr, sizeof(id)) && + block->max_length > offset) { + return memory_region_get_ram_ptr(block->mr) + offset; + } + } + + error_report("Can't find block %s!", id); + return NULL; +} + +/* + * If a page (or a whole RDMA chunk) has been + * determined to be zero, then zap it. + */ +void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) +{ + if (ch != 0 || !is_zero_range(host, size)) { + memset(host, ch, size); + } +} + +static void *do_data_decompress(void *opaque) +{ + DecompressParam *param = opaque; + unsigned long pagesize; + + while (!quit_decomp_thread) { + qemu_mutex_lock(¶m->mutex); + while (!param->start && !quit_decomp_thread) { + qemu_cond_wait(¶m->cond, ¶m->mutex); + pagesize = TARGET_PAGE_SIZE; + if (!quit_decomp_thread) { + /* uncompress() will return failed in some case, especially + * when the page is dirted when doing the compression, it's + * not a problem because the dirty page will be retransferred + * and uncompress() won't break the data in other pages. + */ + uncompress((Bytef *)param->des, &pagesize, + (const Bytef *)param->compbuf, param->len); + } + param->start = false; + } + qemu_mutex_unlock(¶m->mutex); + } + + return NULL; +} + +void migrate_decompress_threads_create(void) +{ + int i, thread_count; + + thread_count = migrate_decompress_threads(); + decompress_threads = g_new0(QemuThread, thread_count); + decomp_param = g_new0(DecompressParam, thread_count); + compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); + quit_decomp_thread = false; + for (i = 0; i < thread_count; i++) { + qemu_mutex_init(&decomp_param[i].mutex); + qemu_cond_init(&decomp_param[i].cond); + decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); + qemu_thread_create(decompress_threads + i, "decompress", + do_data_decompress, decomp_param + i, + QEMU_THREAD_JOINABLE); + } +} + +void migrate_decompress_threads_join(void) +{ + int i, thread_count; + + quit_decomp_thread = true; + thread_count = migrate_decompress_threads(); + for (i = 0; i < thread_count; i++) { + qemu_mutex_lock(&decomp_param[i].mutex); + qemu_cond_signal(&decomp_param[i].cond); + qemu_mutex_unlock(&decomp_param[i].mutex); + } + for (i = 0; i < thread_count; i++) { + qemu_thread_join(decompress_threads + i); + qemu_mutex_destroy(&decomp_param[i].mutex); + qemu_cond_destroy(&decomp_param[i].cond); + g_free(decomp_param[i].compbuf); + } + g_free(decompress_threads); + g_free(decomp_param); + g_free(compressed_data_buf); + decompress_threads = NULL; + decomp_param = NULL; + compressed_data_buf = NULL; +} + +static void decompress_data_with_multi_threads(uint8_t *compbuf, + void *host, int len) +{ + int idx, thread_count; + + thread_count = migrate_decompress_threads(); + while (true) { + for (idx = 0; idx < thread_count; idx++) { + if (!decomp_param[idx].start) { + memcpy(decomp_param[idx].compbuf, compbuf, len); + decomp_param[idx].des = host; + decomp_param[idx].len = len; + start_decompression(&decomp_param[idx]); + break; + } + } + if (idx < thread_count) { + break; + } + } +} + +static int ram_load(QEMUFile *f, void *opaque, int version_id) +{ + int flags = 0, ret = 0; + static uint64_t seq_iter; + int len = 0; + + seq_iter++; + + if (version_id != 4) { + ret = -EINVAL; + } + + /* This RCU critical section can be very long running. + * When RCU reclaims in the code start to become numerous, + * it will be necessary to reduce the granularity of this + * critical section. + */ + rcu_read_lock(); + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + ram_addr_t addr, total_ram_bytes; + void *host; + uint8_t ch; + + addr = qemu_get_be64(f); + flags = addr & ~TARGET_PAGE_MASK; + addr &= TARGET_PAGE_MASK; + + switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { + case RAM_SAVE_FLAG_MEM_SIZE: + /* Synchronize RAM block list */ + total_ram_bytes = addr; + while (!ret && total_ram_bytes) { + RAMBlock *block; + char id[256]; + ram_addr_t length; + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + length = qemu_get_be64(f); + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + if (!strncmp(id, block->idstr, sizeof(id))) { + if (length != block->used_length) { + Error *local_err = NULL; + + ret = qemu_ram_resize(block->offset, length, &local_err); + if (local_err) { + error_report_err(local_err); + } + } + break; + } + } + + if (!block) { + error_report("Unknown ramblock \"%s\", cannot " + "accept migration", id); + ret = -EINVAL; + } + + total_ram_bytes -= length; + } + break; + case RAM_SAVE_FLAG_COMPRESS: + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + ch = qemu_get_byte(f); + ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); + break; + case RAM_SAVE_FLAG_PAGE: + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + break; + case RAM_SAVE_FLAG_COMPRESS_PAGE: + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Invalid RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + + len = qemu_get_be32(f); + if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { + error_report("Invalid compressed data length: %d", len); + ret = -EINVAL; + break; + } + qemu_get_buffer(f, compressed_data_buf, len); + decompress_data_with_multi_threads(compressed_data_buf, host, len); + break; + case RAM_SAVE_FLAG_XBZRLE: + host = host_from_stream_offset(f, addr, flags); + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + if (load_xbzrle(f, addr, host) < 0) { + error_report("Failed to decompress XBZRLE page at " + RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + break; + default: + if (flags & RAM_SAVE_FLAG_HOOK) { + ram_control_load_hook(f, flags); + } else { + error_report("Unknown combination of migration flags: %#x", + flags); + ret = -EINVAL; + } + } + if (!ret) { + ret = qemu_file_get_error(f); + } + } + + rcu_read_unlock(); + DPRINTF("Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); + return ret; +} + +static SaveVMHandlers savevm_ram_handlers = { + .save_live_setup = ram_save_setup, + .save_live_iterate = ram_save_iterate, + .save_live_complete = ram_save_complete, + .save_live_pending = ram_save_pending, + .load_state = ram_load, + .cancel = ram_migration_cancel, +}; + +void ram_mig_init(void) +{ + qemu_mutex_init(&XBZRLE.lock); + register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); +} +/* Stub function that's gets run on the vcpu when its brought out of the + VM to run inside qemu via async_run_on_cpu()*/ + +static void mig_sleep_cpu(void *opq) +{ + qemu_mutex_unlock_iothread(); + g_usleep(30*1000); + qemu_mutex_lock_iothread(); +} + +/* To reduce the dirty rate explicitly disallow the VCPUs from spending + much time in the VM. The migration thread will try to catchup. + Workload will experience a performance drop. +*/ +static void mig_throttle_guest_down(void) +{ + CPUState *cpu; + + qemu_mutex_lock_iothread(); + CPU_FOREACH(cpu) { + async_run_on_cpu(cpu, mig_sleep_cpu, NULL); + } + qemu_mutex_unlock_iothread(); +} + +static void check_guest_throttling(void) +{ + static int64_t t0; + int64_t t1; + + if (!mig_throttle_on) { + return; + } + + if (!t0) { + t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + return; + } + + t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + /* If it has been more than 40 ms since the last time the guest + * was throttled then do it again. + */ + if (40 < (t1-t0)/1000000) { + mig_throttle_guest_down(); + t0 = t1; + } +} diff --git a/migration/rdma.c b/migration/rdma.c index 77e34441dc..48b3e64b34 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -236,13 +236,13 @@ typedef struct RDMALocalBlock { * corresponding RDMALocalBlock with * the information needed to perform the actual RDMA. */ -typedef struct QEMU_PACKED RDMARemoteBlock { +typedef struct QEMU_PACKED RDMADestBlock { uint64_t remote_host_addr; uint64_t offset; uint64_t length; uint32_t remote_rkey; uint32_t padding; -} RDMARemoteBlock; +} RDMADestBlock; static uint64_t htonll(uint64_t v) { @@ -258,20 +258,20 @@ static uint64_t ntohll(uint64_t v) { return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]); } -static void remote_block_to_network(RDMARemoteBlock *rb) +static void dest_block_to_network(RDMADestBlock *db) { - rb->remote_host_addr = htonll(rb->remote_host_addr); - rb->offset = htonll(rb->offset); - rb->length = htonll(rb->length); - rb->remote_rkey = htonl(rb->remote_rkey); + db->remote_host_addr = htonll(db->remote_host_addr); + db->offset = htonll(db->offset); + db->length = htonll(db->length); + db->remote_rkey = htonl(db->remote_rkey); } -static void network_to_remote_block(RDMARemoteBlock *rb) +static void network_to_dest_block(RDMADestBlock *db) { - rb->remote_host_addr = ntohll(rb->remote_host_addr); - rb->offset = ntohll(rb->offset); - rb->length = ntohll(rb->length); - rb->remote_rkey = ntohl(rb->remote_rkey); + db->remote_host_addr = ntohll(db->remote_host_addr); + db->offset = ntohll(db->offset); + db->length = ntohll(db->length); + db->remote_rkey = ntohl(db->remote_rkey); } /* @@ -350,7 +350,7 @@ typedef struct RDMAContext { * Description of ram blocks used throughout the code. */ RDMALocalBlocks local_ram_blocks; - RDMARemoteBlock *block; + RDMADestBlock *dest_blocks; /* * Migration on *destination* started. @@ -570,10 +570,10 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr, * in advanced before the migration starts. This tells us where the RAM blocks * are so that we can register them individually. */ -static void qemu_rdma_init_one_block(void *host_addr, +static int qemu_rdma_init_one_block(const char *block_name, void *host_addr, ram_addr_t block_offset, ram_addr_t length, void *opaque) { - rdma_add_block(opaque, host_addr, block_offset, length); + return rdma_add_block(opaque, host_addr, block_offset, length); } /* @@ -590,7 +590,7 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) memset(local, 0, sizeof *local); qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma); trace_qemu_rdma_init_ram_blocks(local->nb_blocks); - rdma->block = (RDMARemoteBlock *) g_malloc0(sizeof(RDMARemoteBlock) * + rdma->dest_blocks = (RDMADestBlock *) g_malloc0(sizeof(RDMADestBlock) * rdma->local_ram_blocks.nb_blocks); local->init = true; return 0; @@ -790,6 +790,13 @@ static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs) for (x = 0; x < num_devices; x++) { verbs = ibv_open_device(dev_list[x]); + if (!verbs) { + if (errno == EPERM) { + continue; + } else { + return -EINVAL; + } + } if (ibv_query_port(verbs, 1, &port_attr)) { ibv_close_device(verbs); @@ -2177,8 +2184,8 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) rdma->connected = false; } - g_free(rdma->block); - rdma->block = NULL; + g_free(rdma->dest_blocks); + rdma->dest_blocks = NULL; for (idx = 0; idx < RDMA_WRID_MAX; idx++) { if (rdma->wr_data[idx].control_mr) { @@ -2445,7 +2452,6 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp) if (host_port) { rdma = g_malloc0(sizeof(RDMAContext)); - memset(rdma, 0, sizeof(RDMAContext)); rdma->current_index = -1; rdma->current_chunk = -1; @@ -2967,25 +2973,25 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, * their "local" descriptions with what was sent. */ for (i = 0; i < local->nb_blocks; i++) { - rdma->block[i].remote_host_addr = + rdma->dest_blocks[i].remote_host_addr = (uintptr_t)(local->block[i].local_host_addr); if (rdma->pin_all) { - rdma->block[i].remote_rkey = local->block[i].mr->rkey; + rdma->dest_blocks[i].remote_rkey = local->block[i].mr->rkey; } - rdma->block[i].offset = local->block[i].offset; - rdma->block[i].length = local->block[i].length; + rdma->dest_blocks[i].offset = local->block[i].offset; + rdma->dest_blocks[i].length = local->block[i].length; - remote_block_to_network(&rdma->block[i]); + dest_block_to_network(&rdma->dest_blocks[i]); } blocks.len = rdma->local_ram_blocks.nb_blocks - * sizeof(RDMARemoteBlock); + * sizeof(RDMADestBlock); ret = qemu_rdma_post_send_control(rdma, - (uint8_t *) rdma->block, &blocks); + (uint8_t *) rdma->dest_blocks, &blocks); if (ret < 0) { error_report("rdma migration: error sending remote info"); @@ -3141,7 +3147,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, if (flags == RAM_CONTROL_SETUP) { RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT }; RDMALocalBlocks *local = &rdma->local_ram_blocks; - int reg_result_idx, i, j, nb_remote_blocks; + int reg_result_idx, i, j, nb_dest_blocks; head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST; trace_qemu_rdma_registration_stop_ram(); @@ -3162,7 +3168,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, return ret; } - nb_remote_blocks = resp.len / sizeof(RDMARemoteBlock); + nb_dest_blocks = resp.len / sizeof(RDMADestBlock); /* * The protocol uses two different sets of rkeys (mutually exclusive): @@ -3176,7 +3182,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, * and then propagates the remote ram block descriptions to his local copy. */ - if (local->nb_blocks != nb_remote_blocks) { + if (local->nb_blocks != nb_dest_blocks) { ERROR(errp, "ram blocks mismatch #1! " "Your QEMU command line parameters are probably " "not identical on both the source and destination."); @@ -3184,26 +3190,26 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, } qemu_rdma_move_header(rdma, reg_result_idx, &resp); - memcpy(rdma->block, + memcpy(rdma->dest_blocks, rdma->wr_data[reg_result_idx].control_curr, resp.len); - for (i = 0; i < nb_remote_blocks; i++) { - network_to_remote_block(&rdma->block[i]); + for (i = 0; i < nb_dest_blocks; i++) { + network_to_dest_block(&rdma->dest_blocks[i]); /* search local ram blocks */ for (j = 0; j < local->nb_blocks; j++) { - if (rdma->block[i].offset != local->block[j].offset) { + if (rdma->dest_blocks[i].offset != local->block[j].offset) { continue; } - if (rdma->block[i].length != local->block[j].length) { + if (rdma->dest_blocks[i].length != local->block[j].length) { ERROR(errp, "ram blocks mismatch #2! " "Your QEMU command line parameters are probably " "not identical on both the source and destination."); return -EINVAL; } local->block[j].remote_host_addr = - rdma->block[i].remote_host_addr; - local->block[j].remote_rkey = rdma->block[i].remote_rkey; + rdma->dest_blocks[i].remote_host_addr; + local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey; break; } diff --git a/savevm.c b/migration/savevm.c index 3b0e222cb3..2091882196 100644 --- a/savevm.c +++ b/migration/savevm.c @@ -2,6 +2,10 @@ * QEMU System Emulator * * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009-2015 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -51,6 +55,8 @@ #define ARP_PTYPE_IP 0x0800 #define ARP_OP_REQUEST_REV 0x3 +static bool skip_section_footers; + static int announce_self_create(uint8_t *buf, uint8_t *mac_addr) { @@ -235,10 +241,15 @@ typedef struct SaveStateEntry { int is_ram; } SaveStateEntry; +typedef struct SaveState { + QTAILQ_HEAD(, SaveStateEntry) handlers; + int global_section_id; +} SaveState; -static QTAILQ_HEAD(savevm_handlers, SaveStateEntry) savevm_handlers = - QTAILQ_HEAD_INITIALIZER(savevm_handlers); -static int global_section_id; +static SaveState savevm_state = { + .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), + .global_section_id = 0, +}; static void dump_vmstate_vmsd(FILE *out_file, const VMStateDescription *vmsd, int indent, @@ -263,11 +274,11 @@ static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field, } static void dump_vmstate_vmss(FILE *out_file, - const VMStateSubsection *subsection, + const VMStateDescription **subsection, int indent) { - if (subsection->vmsd != NULL) { - dump_vmstate_vmsd(out_file, subsection->vmsd, indent, true); + if (*subsection != NULL) { + dump_vmstate_vmsd(out_file, *subsection, indent, true); } } @@ -308,12 +319,12 @@ static void dump_vmstate_vmsd(FILE *out_file, fprintf(out_file, "\n%*s]", indent, ""); } if (vmsd->subsections != NULL) { - const VMStateSubsection *subsection = vmsd->subsections; + const VMStateDescription **subsection = vmsd->subsections; bool first; fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, ""); first = true; - while (subsection->vmsd != NULL) { + while (*subsection != NULL) { if (!first) { fprintf(out_file, ",\n"); } @@ -383,7 +394,7 @@ static int calculate_new_instance_id(const char *idstr) SaveStateEntry *se; int instance_id = 0; - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (strcmp(idstr, se->idstr) == 0 && instance_id <= se->instance_id) { instance_id = se->instance_id + 1; @@ -397,7 +408,7 @@ static int calculate_compat_instance_id(const char *idstr) SaveStateEntry *se; int instance_id = 0; - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->compat) { continue; } @@ -425,7 +436,7 @@ int register_savevm_live(DeviceState *dev, se = g_malloc0(sizeof(SaveStateEntry)); se->version_id = version_id; - se->section_id = global_section_id++; + se->section_id = savevm_state.global_section_id++; se->ops = ops; se->opaque = opaque; se->vmsd = NULL; @@ -457,7 +468,7 @@ int register_savevm_live(DeviceState *dev, } assert(!se->compat || se->instance_id == 0); /* add at the end of list */ - QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry); + QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry); return 0; } @@ -491,9 +502,9 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) } pstrcat(id, sizeof(id), idstr); - QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) { + QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) { - QTAILQ_REMOVE(&savevm_handlers, se, entry); + QTAILQ_REMOVE(&savevm_state.handlers, se, entry); if (se->compat) { g_free(se->compat); } @@ -515,7 +526,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, se = g_malloc0(sizeof(SaveStateEntry)); se->version_id = vmsd->version_id; - se->section_id = global_section_id++; + se->section_id = savevm_state.global_section_id++; se->opaque = opaque; se->vmsd = vmsd; se->alias_id = alias_id; @@ -543,7 +554,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, } assert(!se->compat || se->instance_id == 0); /* add at the end of list */ - QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry); + QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry); return 0; } @@ -552,9 +563,9 @@ void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd, { SaveStateEntry *se, *new_se; - QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) { + QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { if (se->vmsd == vmsd && se->opaque == opaque) { - QTAILQ_REMOVE(&savevm_handlers, se, entry); + QTAILQ_REMOVE(&savevm_state.handlers, se, entry); if (se->compat) { g_free(se->compat); } @@ -602,11 +613,84 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc) vmstate_save_state(f, se->vmsd, se->opaque, vmdesc); } +void savevm_skip_section_footers(void) +{ + skip_section_footers = true; +} + +/* + * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL) + */ +static void save_section_header(QEMUFile *f, SaveStateEntry *se, + uint8_t section_type) +{ + qemu_put_byte(f, section_type); + qemu_put_be32(f, se->section_id); + + if (section_type == QEMU_VM_SECTION_FULL || + section_type == QEMU_VM_SECTION_START) { + /* ID string */ + size_t len = strlen(se->idstr); + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)se->idstr, len); + + qemu_put_be32(f, se->instance_id); + qemu_put_be32(f, se->version_id); + } +} + +/* + * Write a footer onto device sections that catches cases misformatted device + * sections. + */ +static void save_section_footer(QEMUFile *f, SaveStateEntry *se) +{ + if (!skip_section_footers) { + qemu_put_byte(f, QEMU_VM_SECTION_FOOTER); + qemu_put_be32(f, se->section_id); + } +} + +/* + * Read a footer off the wire and check that it matches the expected section + * + * Returns: true if the footer was good + * false if there is a problem (and calls error_report to say why) + */ +static bool check_section_footer(QEMUFile *f, SaveStateEntry *se) +{ + uint8_t read_mark; + uint32_t read_section_id; + + if (skip_section_footers) { + /* No footer to check */ + return true; + } + + read_mark = qemu_get_byte(f); + + if (read_mark != QEMU_VM_SECTION_FOOTER) { + error_report("Missing section footer for %s", se->idstr); + return false; + } + + read_section_id = qemu_get_be32(f); + if (read_section_id != se->section_id) { + error_report("Mismatched section id in footer for %s -" + " read 0x%x expected 0x%x", + se->idstr, read_section_id, se->section_id); + return false; + } + + /* All good */ + return true; +} + bool qemu_savevm_state_blocked(Error **errp) { SaveStateEntry *se; - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->vmsd && se->vmsd->unmigratable) { error_setg(errp, "State blocked by non-migratable device '%s'", se->idstr); @@ -616,6 +700,13 @@ bool qemu_savevm_state_blocked(Error **errp) return false; } +void qemu_savevm_state_header(QEMUFile *f) +{ + trace_savevm_state_header(); + qemu_put_be32(f, QEMU_VM_FILE_MAGIC); + qemu_put_be32(f, QEMU_VM_FILE_VERSION); +} + void qemu_savevm_state_begin(QEMUFile *f, const MigrationParams *params) { @@ -623,19 +714,14 @@ void qemu_savevm_state_begin(QEMUFile *f, int ret; trace_savevm_state_begin(); - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->set_params) { continue; } se->ops->set_params(params, se->opaque); } - qemu_put_be32(f, QEMU_VM_FILE_MAGIC); - qemu_put_be32(f, QEMU_VM_FILE_VERSION); - - QTAILQ_FOREACH(se, &savevm_handlers, entry) { - int len; - + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_setup) { continue; } @@ -644,19 +730,10 @@ void qemu_savevm_state_begin(QEMUFile *f, continue; } } - /* Section type */ - qemu_put_byte(f, QEMU_VM_SECTION_START); - qemu_put_be32(f, se->section_id); - - /* ID string */ - len = strlen(se->idstr); - qemu_put_byte(f, len); - qemu_put_buffer(f, (uint8_t *)se->idstr, len); - - qemu_put_be32(f, se->instance_id); - qemu_put_be32(f, se->version_id); + save_section_header(f, se, QEMU_VM_SECTION_START); ret = se->ops->save_live_setup(f, se->opaque); + save_section_footer(f, se); if (ret < 0) { qemu_file_set_error(f, ret); break; @@ -676,7 +753,7 @@ int qemu_savevm_state_iterate(QEMUFile *f) int ret = 1; trace_savevm_state_iterate(); - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_iterate) { continue; } @@ -689,12 +766,12 @@ int qemu_savevm_state_iterate(QEMUFile *f) return 0; } trace_savevm_section_start(se->idstr, se->section_id); - /* Section type */ - qemu_put_byte(f, QEMU_VM_SECTION_PART); - qemu_put_be32(f, se->section_id); + + save_section_header(f, se, QEMU_VM_SECTION_PART); ret = se->ops->save_live_iterate(f, se->opaque); trace_savevm_section_end(se->idstr, se->section_id, ret); + save_section_footer(f, se); if (ret < 0) { qemu_file_set_error(f, ret); @@ -727,7 +804,7 @@ void qemu_savevm_state_complete(QEMUFile *f) cpu_synchronize_all_states(); - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_complete) { continue; } @@ -737,12 +814,12 @@ void qemu_savevm_state_complete(QEMUFile *f) } } trace_savevm_section_start(se->idstr, se->section_id); - /* Section type */ - qemu_put_byte(f, QEMU_VM_SECTION_END); - qemu_put_be32(f, se->section_id); + + save_section_header(f, se, QEMU_VM_SECTION_END); ret = se->ops->save_live_complete(f, se->opaque); trace_savevm_section_end(se->idstr, se->section_id, ret); + save_section_footer(f, se); if (ret < 0) { qemu_file_set_error(f, ret); return; @@ -752,8 +829,7 @@ void qemu_savevm_state_complete(QEMUFile *f) vmdesc = qjson_new(); json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE); json_start_array(vmdesc, "devices"); - QTAILQ_FOREACH(se, &savevm_handlers, entry) { - int len; + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if ((!se->ops || !se->ops->save_state) && !se->vmsd) { continue; @@ -764,22 +840,13 @@ void qemu_savevm_state_complete(QEMUFile *f) json_prop_str(vmdesc, "name", se->idstr); json_prop_int(vmdesc, "instance_id", se->instance_id); - /* Section type */ - qemu_put_byte(f, QEMU_VM_SECTION_FULL); - qemu_put_be32(f, se->section_id); - - /* ID string */ - len = strlen(se->idstr); - qemu_put_byte(f, len); - qemu_put_buffer(f, (uint8_t *)se->idstr, len); - - qemu_put_be32(f, se->instance_id); - qemu_put_be32(f, se->version_id); + save_section_header(f, se, QEMU_VM_SECTION_FULL); vmstate_save(f, se, vmdesc); json_end_object(vmdesc); trace_savevm_section_end(se->idstr, se->section_id, 0); + save_section_footer(f, se); } qemu_put_byte(f, QEMU_VM_EOF); @@ -803,7 +870,7 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) SaveStateEntry *se; uint64_t ret = 0; - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_pending) { continue; } @@ -822,7 +889,7 @@ void qemu_savevm_state_cancel(void) SaveStateEntry *se; trace_savevm_state_cancel(); - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->ops && se->ops->cancel) { se->ops->cancel(se->opaque); } @@ -842,6 +909,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) } qemu_mutex_unlock_iothread(); + qemu_savevm_state_header(f); qemu_savevm_state_begin(f, ¶ms); qemu_mutex_lock_iothread(); @@ -872,9 +940,7 @@ static int qemu_save_device_state(QEMUFile *f) cpu_synchronize_all_states(); - QTAILQ_FOREACH(se, &savevm_handlers, entry) { - int len; - + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->is_ram) { continue; } @@ -882,19 +948,11 @@ static int qemu_save_device_state(QEMUFile *f) continue; } - /* Section type */ - qemu_put_byte(f, QEMU_VM_SECTION_FULL); - qemu_put_be32(f, se->section_id); - - /* ID string */ - len = strlen(se->idstr); - qemu_put_byte(f, len); - qemu_put_buffer(f, (uint8_t *)se->idstr, len); - - qemu_put_be32(f, se->instance_id); - qemu_put_be32(f, se->version_id); + save_section_header(f, se, QEMU_VM_SECTION_FULL); vmstate_save(f, se, NULL); + + save_section_footer(f, se); } qemu_put_byte(f, QEMU_VM_EOF); @@ -906,7 +964,7 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id) { SaveStateEntry *se; - QTAILQ_FOREACH(se, &savevm_handlers, entry) { + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!strcmp(se->idstr, idstr) && (instance_id == se->instance_id || instance_id == se->alias_id)) @@ -922,18 +980,26 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id) return NULL; } -typedef struct LoadStateEntry { +struct LoadStateEntry { QLIST_ENTRY(LoadStateEntry) entry; SaveStateEntry *se; int section_id; int version_id; -} LoadStateEntry; +}; -int qemu_loadvm_state(QEMUFile *f) +void loadvm_free_handlers(MigrationIncomingState *mis) { - QLIST_HEAD(, LoadStateEntry) loadvm_handlers = - QLIST_HEAD_INITIALIZER(loadvm_handlers); LoadStateEntry *le, *new_le; + + QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) { + QLIST_REMOVE(le, entry); + g_free(le); + } +} + +int qemu_loadvm_state(QEMUFile *f) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; uint8_t section_type; unsigned int v; @@ -964,8 +1030,8 @@ int qemu_loadvm_state(QEMUFile *f) while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) { uint32_t instance_id, version_id, section_id; SaveStateEntry *se; - char idstr[257]; - int len; + LoadStateEntry *le; + char idstr[256]; trace_qemu_loadvm_state_section(section_type); switch (section_type) { @@ -973,9 +1039,11 @@ int qemu_loadvm_state(QEMUFile *f) case QEMU_VM_SECTION_FULL: /* Read section start */ section_id = qemu_get_be32(f); - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)idstr, len); - idstr[len] = 0; + if (!qemu_get_counted_string(f, idstr)) { + error_report("Unable to read ID string for section %u", + section_id); + return -EINVAL; + } instance_id = qemu_get_be32(f); version_id = qemu_get_be32(f); @@ -1004,7 +1072,7 @@ int qemu_loadvm_state(QEMUFile *f) le->se = se; le->section_id = section_id; le->version_id = version_id; - QLIST_INSERT_HEAD(&loadvm_handlers, le, entry); + QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry); ret = vmstate_load(f, le->se, le->version_id); if (ret < 0) { @@ -1012,13 +1080,17 @@ int qemu_loadvm_state(QEMUFile *f) " device '%s'", instance_id, idstr); goto out; } + if (!check_section_footer(f, le->se)) { + ret = -EINVAL; + goto out; + } break; case QEMU_VM_SECTION_PART: case QEMU_VM_SECTION_END: section_id = qemu_get_be32(f); trace_qemu_loadvm_state_section_partend(section_id); - QLIST_FOREACH(le, &loadvm_handlers, entry) { + QLIST_FOREACH(le, &mis->loadvm_handlers, entry) { if (le->section_id == section_id) { break; } @@ -1035,6 +1107,10 @@ int qemu_loadvm_state(QEMUFile *f) section_id, le->se->idstr); goto out; } + if (!check_section_footer(f, le->se)) { + ret = -EINVAL; + goto out; + } break; default: error_report("Unknown savevm section type %d", section_type); @@ -1066,11 +1142,6 @@ int qemu_loadvm_state(QEMUFile *f) ret = 0; out: - QLIST_FOREACH_SAFE(le, &loadvm_handlers, entry, new_le) { - QLIST_REMOVE(le, entry); - g_free(le); - } - if (ret == 0) { /* We may not have a VMDESC section, so ignore relative errors */ ret = file_error_after_eof; @@ -1314,9 +1385,11 @@ int load_vmstate(const char *name) } qemu_system_reset(VMRESET_SILENT); + migration_incoming_state_new(f); ret = qemu_loadvm_state(f); qemu_fclose(f); + migration_incoming_state_destroy(); if (ret < 0) { error_report("Error %d while loading VM state", ret); return ret; diff --git a/migration/vmstate.c b/migration/vmstate.c index e5388f0596..6138d1acb7 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -341,11 +341,11 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, } static const VMStateDescription * - vmstate_get_subsection(const VMStateSubsection *sub, char *idstr) +vmstate_get_subsection(const VMStateDescription **sub, char *idstr) { - while (sub && sub->needed) { - if (strcmp(idstr, sub->vmsd->name) == 0) { - return sub->vmsd; + while (sub && *sub && (*sub)->needed) { + if (strcmp(idstr, (*sub)->name) == 0) { + return *sub; } sub++; } @@ -358,7 +358,7 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, trace_vmstate_subsection_load(vmsd->name); while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) { - char idstr[256]; + char idstr[256], *idstr_ret; int ret; uint8_t version_id, len, size; const VMStateDescription *sub_vmsd; @@ -369,11 +369,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, trace_vmstate_subsection_load_bad(vmsd->name, "(short)"); return 0; } - size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2); + size = qemu_peek_buffer(f, (uint8_t **)&idstr_ret, len, 2); if (size != len) { trace_vmstate_subsection_load_bad(vmsd->name, "(peek fail)"); return 0; } + memcpy(idstr, idstr_ret, size); idstr[size] = 0; if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) { @@ -405,12 +406,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, QJSON *vmdesc) { - const VMStateSubsection *sub = vmsd->subsections; + const VMStateDescription **sub = vmsd->subsections; bool subsection_found = false; - while (sub && sub->needed) { - if (sub->needed(opaque)) { - const VMStateDescription *vmsd = sub->vmsd; + while (sub && *sub && (*sub)->needed) { + if ((*sub)->needed(opaque)) { + const VMStateDescription *vmsd = *sub; uint8_t len; if (vmdesc) { diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py index 0c8b22f2aa..f6894bece9 100755 --- a/scripts/analyze-migration.py +++ b/scripts/analyze-migration.py @@ -474,6 +474,7 @@ class MigrationDump(object): QEMU_VM_SECTION_FULL = 0x04 QEMU_VM_SUBSECTION = 0x05 QEMU_VM_VMDESCRIPTION = 0x06 + QEMU_VM_SECTION_FOOTER= 0x7e def __init__(self, filename): self.section_classes = { ( 'ram', 0 ) : [ RamSection, None ], @@ -526,6 +527,10 @@ class MigrationDump(object): elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END: section_id = file.read32() self.sections[section_id].read() + elif section_type == self.QEMU_VM_SECTION_FOOTER: + read_section_id = file.read32() + if read_section_id != section_id: + raise Exception("Mismatched section footer: %x vs %x" % (read_section_id, section_id)) else: raise Exception("Unknown section type: %d" % section_type) file.close() diff --git a/target-arm/machine.c b/target-arm/machine.c index 9446e5a8ab..36365a57c7 100644 --- a/target-arm/machine.c +++ b/target-arm/machine.c @@ -40,6 +40,7 @@ static const VMStateDescription vmstate_vfp = { .name = "cpu/vfp", .version_id = 3, .minimum_version_id = 3, + .needed = vfp_needed, .fields = (VMStateField[]) { VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64), /* The xregs array is a little awkward because element 1 (FPSCR) @@ -72,6 +73,7 @@ static const VMStateDescription vmstate_iwmmxt = { .name = "cpu/iwmmxt", .version_id = 1, .minimum_version_id = 1, + .needed = iwmmxt_needed, .fields = (VMStateField[]) { VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16), VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16), @@ -91,6 +93,7 @@ static const VMStateDescription vmstate_m = { .name = "cpu/m", .version_id = 1, .minimum_version_id = 1, + .needed = m_needed, .fields = (VMStateField[]) { VMSTATE_UINT32(env.v7m.other_sp, ARMCPU), VMSTATE_UINT32(env.v7m.vecbase, ARMCPU), @@ -114,6 +117,7 @@ static const VMStateDescription vmstate_thumb2ee = { .name = "cpu/thumb2ee", .version_id = 1, .minimum_version_id = 1, + .needed = thumb2ee_needed, .fields = (VMStateField[]) { VMSTATE_UINT32(env.teecr, ARMCPU), VMSTATE_UINT32(env.teehbr, ARMCPU), @@ -282,21 +286,11 @@ const VMStateDescription vmstate_arm_cpu = { VMSTATE_BOOL(powered_off, ARMCPU), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_vfp, - .needed = vfp_needed, - } , { - .vmsd = &vmstate_iwmmxt, - .needed = iwmmxt_needed, - } , { - .vmsd = &vmstate_m, - .needed = m_needed, - } , { - .vmsd = &vmstate_thumb2ee, - .needed = thumb2ee_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_vfp, + &vmstate_iwmmxt, + &vmstate_m, + &vmstate_thumb2ee, + NULL } }; diff --git a/target-i386/machine.c b/target-i386/machine.c index 69d86cb476..a0df64b577 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -403,6 +403,7 @@ static const VMStateDescription vmstate_steal_time_msr = { .name = "cpu/steal_time_msr", .version_id = 1, .minimum_version_id = 1, + .needed = steal_time_msr_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.steal_time_msr, X86CPU), VMSTATE_END_OF_LIST() @@ -413,6 +414,7 @@ static const VMStateDescription vmstate_async_pf_msr = { .name = "cpu/async_pf_msr", .version_id = 1, .minimum_version_id = 1, + .needed = async_pf_msr_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.async_pf_en_msr, X86CPU), VMSTATE_END_OF_LIST() @@ -423,6 +425,7 @@ static const VMStateDescription vmstate_pv_eoi_msr = { .name = "cpu/async_pv_eoi_msr", .version_id = 1, .minimum_version_id = 1, + .needed = pv_eoi_msr_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.pv_eoi_en_msr, X86CPU), VMSTATE_END_OF_LIST() @@ -441,6 +444,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = { .name = "cpu/fpop_ip_dp", .version_id = 1, .minimum_version_id = 1, + .needed = fpop_ip_dp_needed, .fields = (VMStateField[]) { VMSTATE_UINT16(env.fpop, X86CPU), VMSTATE_UINT64(env.fpip, X86CPU), @@ -461,6 +465,7 @@ static const VMStateDescription vmstate_msr_tsc_adjust = { .name = "cpu/msr_tsc_adjust", .version_id = 1, .minimum_version_id = 1, + .needed = tsc_adjust_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.tsc_adjust, X86CPU), VMSTATE_END_OF_LIST() @@ -479,6 +484,7 @@ static const VMStateDescription vmstate_msr_tscdeadline = { .name = "cpu/msr_tscdeadline", .version_id = 1, .minimum_version_id = 1, + .needed = tscdeadline_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.tsc_deadline, X86CPU), VMSTATE_END_OF_LIST() @@ -505,6 +511,7 @@ static const VMStateDescription vmstate_msr_ia32_misc_enable = { .name = "cpu/msr_ia32_misc_enable", .version_id = 1, .minimum_version_id = 1, + .needed = misc_enable_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_ia32_misc_enable, X86CPU), VMSTATE_END_OF_LIST() @@ -515,6 +522,7 @@ static const VMStateDescription vmstate_msr_ia32_feature_control = { .name = "cpu/msr_ia32_feature_control", .version_id = 1, .minimum_version_id = 1, + .needed = feature_control_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_ia32_feature_control, X86CPU), VMSTATE_END_OF_LIST() @@ -549,6 +557,7 @@ static const VMStateDescription vmstate_msr_architectural_pmu = { .name = "cpu/msr_architectural_pmu", .version_id = 1, .minimum_version_id = 1, + .needed = pmu_enable_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU), VMSTATE_UINT64(env.msr_global_ctrl, X86CPU), @@ -584,6 +593,7 @@ static const VMStateDescription vmstate_mpx = { .name = "cpu/mpx", .version_id = 1, .minimum_version_id = 1, + .needed = mpx_needed, .fields = (VMStateField[]) { VMSTATE_BND_REGS(env.bnd_regs, X86CPU, 4), VMSTATE_UINT64(env.bndcs_regs.cfgu, X86CPU), @@ -605,6 +615,7 @@ static const VMStateDescription vmstate_msr_hypercall_hypercall = { .name = "cpu/msr_hyperv_hypercall", .version_id = 1, .minimum_version_id = 1, + .needed = hyperv_hypercall_enable_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_hv_guest_os_id, X86CPU), VMSTATE_UINT64(env.msr_hv_hypercall, X86CPU), @@ -624,6 +635,7 @@ static const VMStateDescription vmstate_msr_hyperv_vapic = { .name = "cpu/msr_hyperv_vapic", .version_id = 1, .minimum_version_id = 1, + .needed = hyperv_vapic_enable_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_hv_vapic, X86CPU), VMSTATE_END_OF_LIST() @@ -642,6 +654,7 @@ static const VMStateDescription vmstate_msr_hyperv_time = { .name = "cpu/msr_hyperv_time", .version_id = 1, .minimum_version_id = 1, + .needed = hyperv_time_enable_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_hv_tsc, X86CPU), VMSTATE_END_OF_LIST() @@ -683,6 +696,7 @@ static const VMStateDescription vmstate_avx512 = { .name = "cpu/avx512", .version_id = 1, .minimum_version_id = 1, + .needed = avx512_needed, .fields = (VMStateField[]) { VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS), VMSTATE_ZMMH_REGS_VARS(env.xmm_regs, X86CPU, 0), @@ -705,6 +719,7 @@ static const VMStateDescription vmstate_xss = { .name = "cpu/xss", .version_id = 1, .minimum_version_id = 1, + .needed = xss_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.xss, X86CPU), VMSTATE_END_OF_LIST() @@ -813,54 +828,22 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_END_OF_LIST() /* The above list is not sorted /wrt version numbers, watch out! */ }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_async_pf_msr, - .needed = async_pf_msr_needed, - } , { - .vmsd = &vmstate_pv_eoi_msr, - .needed = pv_eoi_msr_needed, - } , { - .vmsd = &vmstate_steal_time_msr, - .needed = steal_time_msr_needed, - } , { - .vmsd = &vmstate_fpop_ip_dp, - .needed = fpop_ip_dp_needed, - }, { - .vmsd = &vmstate_msr_tsc_adjust, - .needed = tsc_adjust_needed, - }, { - .vmsd = &vmstate_msr_tscdeadline, - .needed = tscdeadline_needed, - }, { - .vmsd = &vmstate_msr_ia32_misc_enable, - .needed = misc_enable_needed, - }, { - .vmsd = &vmstate_msr_ia32_feature_control, - .needed = feature_control_needed, - }, { - .vmsd = &vmstate_msr_architectural_pmu, - .needed = pmu_enable_needed, - } , { - .vmsd = &vmstate_mpx, - .needed = mpx_needed, - }, { - .vmsd = &vmstate_msr_hypercall_hypercall, - .needed = hyperv_hypercall_enable_needed, - }, { - .vmsd = &vmstate_msr_hyperv_vapic, - .needed = hyperv_vapic_enable_needed, - }, { - .vmsd = &vmstate_msr_hyperv_time, - .needed = hyperv_time_enable_needed, - }, { - .vmsd = &vmstate_avx512, - .needed = avx512_needed, - }, { - .vmsd = &vmstate_xss, - .needed = xss_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_async_pf_msr, + &vmstate_pv_eoi_msr, + &vmstate_steal_time_msr, + &vmstate_fpop_ip_dp, + &vmstate_msr_tsc_adjust, + &vmstate_msr_tscdeadline, + &vmstate_msr_ia32_misc_enable, + &vmstate_msr_ia32_feature_control, + &vmstate_msr_architectural_pmu, + &vmstate_mpx, + &vmstate_msr_hypercall_hypercall, + &vmstate_msr_hyperv_vapic, + &vmstate_msr_hyperv_time, + &vmstate_avx512, + &vmstate_xss, + NULL } }; diff --git a/target-ppc/machine.c b/target-ppc/machine.c index d875211a2d..f4ac7611dd 100644 --- a/target-ppc/machine.c +++ b/target-ppc/machine.c @@ -213,6 +213,7 @@ static const VMStateDescription vmstate_fpu = { .name = "cpu/fpu", .version_id = 1, .minimum_version_id = 1, + .needed = fpu_needed, .fields = (VMStateField[]) { VMSTATE_FLOAT64_ARRAY(env.fpr, PowerPCCPU, 32), VMSTATE_UINTTL(env.fpscr, PowerPCCPU), @@ -231,6 +232,7 @@ static const VMStateDescription vmstate_altivec = { .name = "cpu/altivec", .version_id = 1, .minimum_version_id = 1, + .needed = altivec_needed, .fields = (VMStateField[]) { VMSTATE_AVR_ARRAY(env.avr, PowerPCCPU, 32), VMSTATE_UINT32(env.vscr, PowerPCCPU), @@ -249,6 +251,7 @@ static const VMStateDescription vmstate_vsx = { .name = "cpu/vsx", .version_id = 1, .minimum_version_id = 1, + .needed = vsx_needed, .fields = (VMStateField[]) { VMSTATE_UINT64_ARRAY(env.vsr, PowerPCCPU, 32), VMSTATE_END_OF_LIST() @@ -269,6 +272,7 @@ static const VMStateDescription vmstate_tm = { .version_id = 1, .minimum_version_id = 1, .minimum_version_id_old = 1, + .needed = tm_needed, .fields = (VMStateField []) { VMSTATE_UINTTL_ARRAY(env.tm_gpr, PowerPCCPU, 32), VMSTATE_AVR_ARRAY(env.tm_vsr, PowerPCCPU, 64), @@ -302,6 +306,7 @@ static const VMStateDescription vmstate_sr = { .name = "cpu/sr", .version_id = 1, .minimum_version_id = 1, + .needed = sr_needed, .fields = (VMStateField[]) { VMSTATE_UINTTL_ARRAY(env.sr, PowerPCCPU, 32), VMSTATE_END_OF_LIST() @@ -351,6 +356,7 @@ static const VMStateDescription vmstate_slb = { .name = "cpu/slb", .version_id = 1, .minimum_version_id = 1, + .needed = slb_needed, .fields = (VMStateField[]) { VMSTATE_INT32_EQUAL(env.slb_nr, PowerPCCPU), VMSTATE_SLB_ARRAY(env.slb, PowerPCCPU, MAX_SLB_ENTRIES), @@ -383,6 +389,7 @@ static const VMStateDescription vmstate_tlb6xx = { .name = "cpu/tlb6xx", .version_id = 1, .minimum_version_id = 1, + .needed = tlb6xx_needed, .fields = (VMStateField[]) { VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU), VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlb6, PowerPCCPU, @@ -429,6 +436,7 @@ static const VMStateDescription vmstate_pbr403 = { .name = "cpu/pbr403", .version_id = 1, .minimum_version_id = 1, + .needed = pbr403_needed, .fields = (VMStateField[]) { VMSTATE_UINTTL_ARRAY(env.pb, PowerPCCPU, 4), VMSTATE_END_OF_LIST() @@ -439,6 +447,7 @@ static const VMStateDescription vmstate_tlbemb = { .name = "cpu/tlb6xx", .version_id = 1, .minimum_version_id = 1, + .needed = tlbemb_needed, .fields = (VMStateField[]) { VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU), VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbe, PowerPCCPU, @@ -448,13 +457,9 @@ static const VMStateDescription vmstate_tlbemb = { /* 403 protection registers */ VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_pbr403, - .needed = pbr403_needed, - } , { - /* empty */ - } + .subsections = (const VMStateDescription*[]) { + &vmstate_pbr403, + NULL } }; @@ -483,6 +488,7 @@ static const VMStateDescription vmstate_tlbmas = { .name = "cpu/tlbmas", .version_id = 1, .minimum_version_id = 1, + .needed = tlbmas_needed, .fields = (VMStateField[]) { VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU), VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbm, PowerPCCPU, @@ -533,38 +539,18 @@ const VMStateDescription vmstate_ppc_cpu = { VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU), VMSTATE_END_OF_LIST() }, - .subsections = (VMStateSubsection []) { - { - .vmsd = &vmstate_fpu, - .needed = fpu_needed, - } , { - .vmsd = &vmstate_altivec, - .needed = altivec_needed, - } , { - .vmsd = &vmstate_vsx, - .needed = vsx_needed, - } , { - .vmsd = &vmstate_sr, - .needed = sr_needed, - } , { + .subsections = (const VMStateDescription*[]) { + &vmstate_fpu, + &vmstate_altivec, + &vmstate_vsx, + &vmstate_sr, #ifdef TARGET_PPC64 - .vmsd = &vmstate_tm, - .needed = tm_needed, - } , { - .vmsd = &vmstate_slb, - .needed = slb_needed, - } , { + &vmstate_tm, + &vmstate_slb, #endif /* TARGET_PPC64 */ - .vmsd = &vmstate_tlb6xx, - .needed = tlb6xx_needed, - } , { - .vmsd = &vmstate_tlbemb, - .needed = tlbemb_needed, - } , { - .vmsd = &vmstate_tlbmas, - .needed = tlbmas_needed, - } , { - /* empty */ - } + &vmstate_tlb6xx, + &vmstate_tlbemb, + &vmstate_tlbmas, + NULL } }; diff --git a/target-s390x/machine.c b/target-s390x/machine.c index 004474959a..b76fb08319 100644 --- a/target-s390x/machine.c +++ b/target-s390x/machine.c @@ -42,10 +42,17 @@ static void cpu_pre_save(void *opaque) } } +static inline bool fpu_needed(void *opaque) +{ + /* This looks odd, but we might want to NOT transfer fprs in the future */ + return true; +} + const VMStateDescription vmstate_fpu = { .name = "cpu/fpu", .version_id = 1, .minimum_version_id = 1, + .needed = fpu_needed, .fields = (VMStateField[]) { VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU), VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU), @@ -68,16 +75,11 @@ const VMStateDescription vmstate_fpu = { } }; -static inline bool fpu_needed(void *opaque) -{ - /* This looks odd, but we might want to NOT transfer fprs in the future */ - return true; -} - const VMStateDescription vmstate_vregs = { .name = "cpu/vregs", .version_id = 1, .minimum_version_id = 1, + .needed = vregs_needed, .fields = (VMStateField[]) { /* vregs[0][0] -> vregs[15][0] and fregs are overlays */ VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU), @@ -159,16 +161,10 @@ const VMStateDescription vmstate_s390_cpu = { VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL, 0, irqstate_saved_size), VMSTATE_END_OF_LIST() - }, - .subsections = (VMStateSubsection[]) { - { - .vmsd = &vmstate_fpu, - .needed = fpu_needed, - } , { - .vmsd = &vmstate_vregs, - .needed = vregs_needed, - } , { - /* empty */ - } + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_fpu, + &vmstate_vregs, + NULL }, }; diff --git a/trace-events b/trace-events index 2662ffa850..1abca7a1e5 100644 --- a/trace-events +++ b/trace-events @@ -1179,13 +1179,14 @@ virtio_gpu_cmd_res_flush(uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint3 virtio_gpu_fence_ctrl(uint64_t fence, uint32_t type) "fence 0x%" PRIx64 ", type 0x%x" virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64 -# savevm.c +# migration/savevm.c qemu_loadvm_state_section(unsigned int section_type) "%d" qemu_loadvm_state_section_partend(uint32_t section_id) "%u" qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u" savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d" savevm_state_begin(void) "" +savevm_state_header(void) "" savevm_state_iterate(void) "" savevm_state_complete(void) "" savevm_state_cancel(void) "" @@ -1205,7 +1206,7 @@ vmstate_subsection_load_good(const char *parent) "%s" # qemu-file.c qemu_file_fclose(void) "" -# arch_init.c +# migration/ram.c migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" migration_throttle(void) "" |