aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-06-12 11:06:03 +0100
committerPeter Maydell <peter.maydell@linaro.org>2015-06-12 11:06:03 +0100
commita4ef02fd9b3d12b105b56942166c8364ade9be0f (patch)
treec4d824efda7fa533e3e6c32fa5e7a16cf72afc54
parentd8e3b729cf452d2689c8669f1ec18158db29fd5a (diff)
parent4fa3dd17dc29c316726f0d4a354a4d895e130c73 (diff)
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20150612' into staging
migration/next for 20150612 # gpg: Signature made Fri Jun 12 05:56:21 2015 BST using RSA key ID 5872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" # gpg: aka "Juan Quintela <quintela@trasno.org>" * remotes/juanquintela/tags/migration/20150612: (21 commits) Remove unneeded memset Rename RDMA structures to make destination clear Teach analyze-migration.py about section footers Add a protective section footer Disable section footers on older machine types Merge section header writing Move loadvm_handlers into MigrationIncomingState Move copy out of qemu_peek_buffer Create MigrationIncomingState qemu_ram_foreach_block: pass up error value, and down the ramblock name Split header writing out of qemu_savevm_state_begin Add qemu_get_counted_string to read a string prefixed by a count byte migration: Use normal VMStateDescriptions for Subsections migration: create savevm_state migration: Remove duplicated assignment of SETUP status rdma: Fix qemu crash when IPv6 address is used for migration arch_init: Clean up the duplicate variable 'len' defining in ram_load() migration: reduce include files migration: Add myself to the copyright list of both files migration: move savevm.c inside migration/ ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS2
-rw-r--r--Makefile.target3
-rw-r--r--arch_init.c1611
-rw-r--r--cpus.c11
-rw-r--r--docs/migration.txt11
-rw-r--r--exec.c21
-rw-r--r--hw/acpi/ich9.c10
-rw-r--r--hw/acpi/piix4.c10
-rw-r--r--hw/block/fdc.c42
-rw-r--r--hw/char/serial.c41
-rw-r--r--hw/display/qxl.c11
-rw-r--r--hw/display/vga.c11
-rw-r--r--hw/i386/pc_piix.c2
-rw-r--r--hw/i386/pc_q35.c2
-rw-r--r--hw/ide/core.c32
-rw-r--r--hw/ide/pci.c16
-rw-r--r--hw/input/pckbd.c22
-rw-r--r--hw/input/ps2.c11
-rw-r--r--hw/intc/apic_common.c10
-rw-r--r--hw/isa/lpc_ich9.c10
-rw-r--r--hw/net/e1000.c11
-rw-r--r--hw/net/rtl8139.c11
-rw-r--r--hw/net/vmxnet3.c12
-rw-r--r--hw/pci-host/piix.c10
-rw-r--r--hw/scsi/scsi-bus.c11
-rw-r--r--hw/timer/hpet.c11
-rw-r--r--hw/timer/mc146818rtc.c23
-rw-r--r--hw/usb/hcd-ohci.c11
-rw-r--r--hw/usb/redirect.c34
-rw-r--r--hw/virtio/virtio.c16
-rw-r--r--include/exec/cpu-common.h4
-rw-r--r--include/migration/migration.h17
-rw-r--r--include/migration/qemu-file.h5
-rw-r--r--include/migration/vmstate.h10
-rw-r--r--include/qemu/typedefs.h2
-rw-r--r--include/sysemu/arch_init.h1
-rw-r--r--include/sysemu/sysemu.h1
-rw-r--r--migration/migration.c34
-rw-r--r--migration/qemu-file.c29
-rw-r--r--migration/ram.c1628
-rw-r--r--migration/rdma.c78
-rw-r--r--migration/savevm.c (renamed from savevm.c)257
-rw-r--r--migration/vmstate.c21
-rwxr-xr-xscripts/analyze-migration.py5
-rw-r--r--target-arm/machine.c26
-rw-r--r--target-i386/machine.c81
-rw-r--r--target-ppc/machine.c62
-rw-r--r--target-s390x/machine.c30
-rw-r--r--trace-events5
49 files changed, 2191 insertions, 2144 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4ed82154ce..e728d3a1d2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1014,8 +1014,6 @@ M: Amit Shah <amit.shah@redhat.com>
S: Maintained
F: include/migration/
F: migration/
-F: savevm.c
-F: arch_init.c
F: scripts/vmstate-static-checker.py
F: tests/vmstate-static-checker-data/
diff --git a/Makefile.target b/Makefile.target
index ec5b92cb60..3e7aafd72d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -132,9 +132,10 @@ obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
obj-y += qtest.o bootdevice.o
obj-y += hw/
obj-$(CONFIG_KVM) += kvm-all.o
-obj-y += memory.o savevm.o cputlb.o
+obj-y += memory.o cputlb.o
obj-y += memory_mapping.o
obj-y += dump.o
+obj-y += migration/ram.o migration/savevm.o
LIBS := $(libs_softmmu) $(LIBS)
# xen support
diff --git a/arch_init.c b/arch_init.c
index d29447497b..725c638ece 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -22,46 +22,15 @@
* THE SOFTWARE.
*/
#include <stdint.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <zlib.h>
-#ifndef _WIN32
-#include <sys/types.h>
-#include <sys/mman.h>
-#endif
-#include "config.h"
-#include "monitor/monitor.h"
#include "sysemu/sysemu.h"
-#include "qemu/bitops.h"
-#include "qemu/bitmap.h"
#include "sysemu/arch_init.h"
-#include "audio/audio.h"
-#include "hw/i386/pc.h"
#include "hw/pci/pci.h"
#include "hw/audio/audio.h"
-#include "sysemu/kvm.h"
-#include "migration/migration.h"
#include "hw/i386/smbios.h"
-#include "exec/address-spaces.h"
-#include "hw/audio/pcspk.h"
-#include "migration/page_cache.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#include "qmp-commands.h"
-#include "trace.h"
-#include "exec/cpu-all.h"
-#include "exec/ram_addr.h"
#include "hw/acpi/acpi.h"
-#include "qemu/host-utils.h"
-#include "qemu/rcu_queue.h"
-
-#ifdef DEBUG_ARCH_INIT
-#define DPRINTF(fmt, ...) \
- do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
- do { } while (0)
-#endif
#ifdef TARGET_SPARC
int graphic_width = 1024;
@@ -111,24 +80,6 @@ int graphic_depth = 32;
#endif
const uint32_t arch_type = QEMU_ARCH;
-static bool mig_throttle_on;
-static int dirty_rate_high_cnt;
-static void check_guest_throttling(void);
-
-static uint64_t bitmap_sync_count;
-
-/***********************************************************/
-/* ram save/restore */
-
-#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
-#define RAM_SAVE_FLAG_COMPRESS 0x02
-#define RAM_SAVE_FLAG_MEM_SIZE 0x04
-#define RAM_SAVE_FLAG_PAGE 0x08
-#define RAM_SAVE_FLAG_EOS 0x10
-#define RAM_SAVE_FLAG_CONTINUE 0x20
-#define RAM_SAVE_FLAG_XBZRLE 0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
-#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
static struct defconfig_file {
const char *filename;
@@ -139,8 +90,6 @@ static struct defconfig_file {
{ NULL }, /* end of list */
};
-static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
-
int qemu_read_default_config_files(bool userconfig)
{
int ret;
@@ -159,1517 +108,6 @@ int qemu_read_default_config_files(bool userconfig)
return 0;
}
-static inline bool is_zero_range(uint8_t *p, uint64_t size)
-{
- return buffer_find_nonzero_offset(p, size) == size;
-}
-
-/* struct contains XBZRLE cache and a static page
- used by the compression */
-static struct {
- /* buffer used for XBZRLE encoding */
- uint8_t *encoded_buf;
- /* buffer for storing page content */
- uint8_t *current_buf;
- /* Cache for XBZRLE, Protected by lock. */
- PageCache *cache;
- QemuMutex lock;
-} XBZRLE;
-
-/* buffer used for XBZRLE decoding */
-static uint8_t *xbzrle_decoded_buf;
-
-static void XBZRLE_cache_lock(void)
-{
- if (migrate_use_xbzrle())
- qemu_mutex_lock(&XBZRLE.lock);
-}
-
-static void XBZRLE_cache_unlock(void)
-{
- if (migrate_use_xbzrle())
- qemu_mutex_unlock(&XBZRLE.lock);
-}
-
-/*
- * called from qmp_migrate_set_cache_size in main thread, possibly while
- * a migration is in progress.
- * A running migration maybe using the cache and might finish during this
- * call, hence changes to the cache are protected by XBZRLE.lock().
- */
-int64_t xbzrle_cache_resize(int64_t new_size)
-{
- PageCache *new_cache;
- int64_t ret;
-
- if (new_size < TARGET_PAGE_SIZE) {
- return -1;
- }
-
- XBZRLE_cache_lock();
-
- if (XBZRLE.cache != NULL) {
- if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
- goto out_new_size;
- }
- new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
- TARGET_PAGE_SIZE);
- if (!new_cache) {
- error_report("Error creating cache");
- ret = -1;
- goto out;
- }
-
- cache_fini(XBZRLE.cache);
- XBZRLE.cache = new_cache;
- }
-
-out_new_size:
- ret = pow2floor(new_size);
-out:
- XBZRLE_cache_unlock();
- return ret;
-}
-
-/* accounting for migration statistics */
-typedef struct AccountingInfo {
- uint64_t dup_pages;
- uint64_t skipped_pages;
- uint64_t norm_pages;
- uint64_t iterations;
- uint64_t xbzrle_bytes;
- uint64_t xbzrle_pages;
- uint64_t xbzrle_cache_miss;
- double xbzrle_cache_miss_rate;
- uint64_t xbzrle_overflows;
-} AccountingInfo;
-
-static AccountingInfo acct_info;
-
-static void acct_clear(void)
-{
- memset(&acct_info, 0, sizeof(acct_info));
-}
-
-uint64_t dup_mig_bytes_transferred(void)
-{
- return acct_info.dup_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t dup_mig_pages_transferred(void)
-{
- return acct_info.dup_pages;
-}
-
-uint64_t skipped_mig_bytes_transferred(void)
-{
- return acct_info.skipped_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t skipped_mig_pages_transferred(void)
-{
- return acct_info.skipped_pages;
-}
-
-uint64_t norm_mig_bytes_transferred(void)
-{
- return acct_info.norm_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t norm_mig_pages_transferred(void)
-{
- return acct_info.norm_pages;
-}
-
-uint64_t xbzrle_mig_bytes_transferred(void)
-{
- return acct_info.xbzrle_bytes;
-}
-
-uint64_t xbzrle_mig_pages_transferred(void)
-{
- return acct_info.xbzrle_pages;
-}
-
-uint64_t xbzrle_mig_pages_cache_miss(void)
-{
- return acct_info.xbzrle_cache_miss;
-}
-
-double xbzrle_mig_cache_miss_rate(void)
-{
- return acct_info.xbzrle_cache_miss_rate;
-}
-
-uint64_t xbzrle_mig_pages_overflow(void)
-{
- return acct_info.xbzrle_overflows;
-}
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
-struct CompressParam {
- bool start;
- bool done;
- QEMUFile *file;
- QemuMutex mutex;
- QemuCond cond;
- RAMBlock *block;
- ram_addr_t offset;
-};
-typedef struct CompressParam CompressParam;
-
-struct DecompressParam {
- bool start;
- QemuMutex mutex;
- QemuCond cond;
- void *des;
- uint8 *compbuf;
- int len;
-};
-typedef struct DecompressParam DecompressParam;
-
-static CompressParam *comp_param;
-static QemuThread *compress_threads;
-/* comp_done_cond is used to wake up the migration thread when
- * one of the compression threads has finished the compression.
- * comp_done_lock is used to co-work with comp_done_cond.
- */
-static QemuMutex *comp_done_lock;
-static QemuCond *comp_done_cond;
-/* The empty QEMUFileOps will be used by file in CompressParam */
-static const QEMUFileOps empty_ops = { };
-
-static bool compression_switch;
-static bool quit_comp_thread;
-static bool quit_decomp_thread;
-static DecompressParam *decomp_param;
-static QemuThread *decompress_threads;
-static uint8_t *compressed_data_buf;
-
-static int do_compress_ram_page(CompressParam *param);
-
-static void *do_data_compress(void *opaque)
-{
- CompressParam *param = opaque;
-
- while (!quit_comp_thread) {
- qemu_mutex_lock(&param->mutex);
- /* Re-check the quit_comp_thread in case of
- * terminate_compression_threads is called just before
- * qemu_mutex_lock(&param->mutex) and after
- * while(!quit_comp_thread), re-check it here can make
- * sure the compression thread terminate as expected.
- */
- while (!param->start && !quit_comp_thread) {
- qemu_cond_wait(&param->cond, &param->mutex);
- }
- if (!quit_comp_thread) {
- do_compress_ram_page(param);
- }
- param->start = false;
- qemu_mutex_unlock(&param->mutex);
-
- qemu_mutex_lock(comp_done_lock);
- param->done = true;
- qemu_cond_signal(comp_done_cond);
- qemu_mutex_unlock(comp_done_lock);
- }
-
- return NULL;
-}
-
-static inline void terminate_compression_threads(void)
-{
- int idx, thread_count;
-
- thread_count = migrate_compress_threads();
- quit_comp_thread = true;
- for (idx = 0; idx < thread_count; idx++) {
- qemu_mutex_lock(&comp_param[idx].mutex);
- qemu_cond_signal(&comp_param[idx].cond);
- qemu_mutex_unlock(&comp_param[idx].mutex);
- }
-}
-
-void migrate_compress_threads_join(void)
-{
- int i, thread_count;
-
- if (!migrate_use_compression()) {
- return;
- }
- terminate_compression_threads();
- thread_count = migrate_compress_threads();
- for (i = 0; i < thread_count; i++) {
- qemu_thread_join(compress_threads + i);
- qemu_fclose(comp_param[i].file);
- qemu_mutex_destroy(&comp_param[i].mutex);
- qemu_cond_destroy(&comp_param[i].cond);
- }
- qemu_mutex_destroy(comp_done_lock);
- qemu_cond_destroy(comp_done_cond);
- g_free(compress_threads);
- g_free(comp_param);
- g_free(comp_done_cond);
- g_free(comp_done_lock);
- compress_threads = NULL;
- comp_param = NULL;
- comp_done_cond = NULL;
- comp_done_lock = NULL;
-}
-
-void migrate_compress_threads_create(void)
-{
- int i, thread_count;
-
- if (!migrate_use_compression()) {
- return;
- }
- quit_comp_thread = false;
- compression_switch = true;
- thread_count = migrate_compress_threads();
- compress_threads = g_new0(QemuThread, thread_count);
- comp_param = g_new0(CompressParam, thread_count);
- comp_done_cond = g_new0(QemuCond, 1);
- comp_done_lock = g_new0(QemuMutex, 1);
- qemu_cond_init(comp_done_cond);
- qemu_mutex_init(comp_done_lock);
- for (i = 0; i < thread_count; i++) {
- /* com_param[i].file is just used as a dummy buffer to save data, set
- * it's ops to empty.
- */
- comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
- comp_param[i].done = true;
- qemu_mutex_init(&comp_param[i].mutex);
- qemu_cond_init(&comp_param[i].cond);
- qemu_thread_create(compress_threads + i, "compress",
- do_data_compress, comp_param + i,
- QEMU_THREAD_JOINABLE);
- }
-}
-
-/**
- * save_page_header: Write page header to wire
- *
- * If this is the 1st block, it also writes the block identification
- *
- * Returns: Number of bytes written
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * in the lower bits, it contains flags
- */
-static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
-{
- size_t size;
-
- qemu_put_be64(f, offset);
- size = 8;
-
- if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
- qemu_put_byte(f, strlen(block->idstr));
- qemu_put_buffer(f, (uint8_t *)block->idstr,
- strlen(block->idstr));
- size += 1 + strlen(block->idstr);
- }
- return size;
-}
-
-/* Update the xbzrle cache to reflect a page that's been sent as all 0.
- * The important thing is that a stale (not-yet-0'd) page be replaced
- * by the new data.
- * As a bonus, if the page wasn't in the cache it gets added so that
- * when a small write is made into the 0'd page it gets XBZRLE sent
- */
-static void xbzrle_cache_zero_page(ram_addr_t current_addr)
-{
- if (ram_bulk_stage || !migrate_use_xbzrle()) {
- return;
- }
-
- /* We don't care if this fails to allocate a new cache page
- * as long as it updated an old one */
- cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
- bitmap_sync_count);
-}
-
-#define ENCODING_FLAG_XBZRLE 0x1
-
-/**
- * save_xbzrle_page: compress and send current page
- *
- * Returns: 1 means that we wrote the page
- * 0 means that page is identical to the one already sent
- * -1 means that xbzrle would be longer than normal
- *
- * @f: QEMUFile where to send the data
- * @current_data:
- * @current_addr:
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
- ram_addr_t current_addr, RAMBlock *block,
- ram_addr_t offset, bool last_stage,
- uint64_t *bytes_transferred)
-{
- int encoded_len = 0, bytes_xbzrle;
- uint8_t *prev_cached_page;
-
- if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
- acct_info.xbzrle_cache_miss++;
- if (!last_stage) {
- if (cache_insert(XBZRLE.cache, current_addr, *current_data,
- bitmap_sync_count) == -1) {
- return -1;
- } else {
- /* update *current_data when the page has been
- inserted into cache */
- *current_data = get_cached_data(XBZRLE.cache, current_addr);
- }
- }
- return -1;
- }
-
- prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
-
- /* save current buffer into memory */
- memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
-
- /* XBZRLE encoding (if there is no overflow) */
- encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
- TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
- TARGET_PAGE_SIZE);
- if (encoded_len == 0) {
- DPRINTF("Skipping unmodified page\n");
- return 0;
- } else if (encoded_len == -1) {
- DPRINTF("Overflow\n");
- acct_info.xbzrle_overflows++;
- /* update data in the cache */
- if (!last_stage) {
- memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
- *current_data = prev_cached_page;
- }
- return -1;
- }
-
- /* we need to update the data in the cache, in order to get the same data */
- if (!last_stage) {
- memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
- }
-
- /* Send XBZRLE based compressed page */
- bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
- qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
- qemu_put_be16(f, encoded_len);
- qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
- bytes_xbzrle += encoded_len + 1 + 2;
- acct_info.xbzrle_pages++;
- acct_info.xbzrle_bytes += bytes_xbzrle;
- *bytes_transferred += bytes_xbzrle;
-
- return 1;
-}
-
-static inline
-ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
- ram_addr_t start)
-{
- unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
- unsigned long nr = base + (start >> TARGET_PAGE_BITS);
- uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
- unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
-
- unsigned long next;
-
- if (ram_bulk_stage && nr > base) {
- next = nr + 1;
- } else {
- next = find_next_bit(migration_bitmap, size, nr);
- }
-
- if (next < size) {
- clear_bit(next, migration_bitmap);
- migration_dirty_pages--;
- }
- return (next - base) << TARGET_PAGE_BITS;
-}
-
-static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
-{
- migration_dirty_pages +=
- cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
-}
-
-
-/* Fix me: there are too many global variables used in migration process. */
-static int64_t start_time;
-static int64_t bytes_xfer_prev;
-static int64_t num_dirty_pages_period;
-static uint64_t xbzrle_cache_miss_prev;
-static uint64_t iterations_prev;
-
-static void migration_bitmap_sync_init(void)
-{
- start_time = 0;
- bytes_xfer_prev = 0;
- num_dirty_pages_period = 0;
- xbzrle_cache_miss_prev = 0;
- iterations_prev = 0;
-}
-
-/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
-static void migration_bitmap_sync(void)
-{
- RAMBlock *block;
- uint64_t num_dirty_pages_init = migration_dirty_pages;
- MigrationState *s = migrate_get_current();
- int64_t end_time;
- int64_t bytes_xfer_now;
-
- bitmap_sync_count++;
-
- if (!bytes_xfer_prev) {
- bytes_xfer_prev = ram_bytes_transferred();
- }
-
- if (!start_time) {
- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- }
-
- trace_migration_bitmap_sync_start();
- address_space_sync_dirty_bitmap(&address_space_memory);
-
- rcu_read_lock();
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
- }
- rcu_read_unlock();
-
- trace_migration_bitmap_sync_end(migration_dirty_pages
- - num_dirty_pages_init);
- num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
- end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-
- /* more than 1 second = 1000 millisecons */
- if (end_time > start_time + 1000) {
- if (migrate_auto_converge()) {
- /* The following detection logic can be refined later. For now:
- Check to see if the dirtied bytes is 50% more than the approx.
- amount of bytes that just got transferred since the last time we
- were in this routine. If that happens >N times (for now N==4)
- we turn on the throttle down logic */
- bytes_xfer_now = ram_bytes_transferred();
- if (s->dirty_pages_rate &&
- (num_dirty_pages_period * TARGET_PAGE_SIZE >
- (bytes_xfer_now - bytes_xfer_prev)/2) &&
- (dirty_rate_high_cnt++ > 4)) {
- trace_migration_throttle();
- mig_throttle_on = true;
- dirty_rate_high_cnt = 0;
- }
- bytes_xfer_prev = bytes_xfer_now;
- } else {
- mig_throttle_on = false;
- }
- if (migrate_use_xbzrle()) {
- if (iterations_prev != acct_info.iterations) {
- acct_info.xbzrle_cache_miss_rate =
- (double)(acct_info.xbzrle_cache_miss -
- xbzrle_cache_miss_prev) /
- (acct_info.iterations - iterations_prev);
- }
- iterations_prev = acct_info.iterations;
- xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
- }
- s->dirty_pages_rate = num_dirty_pages_period * 1000
- / (end_time - start_time);
- s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
- start_time = end_time;
- num_dirty_pages_period = 0;
- }
- s->dirty_sync_count = bitmap_sync_count;
-}
-
-/**
- * save_zero_page: Send the zero page to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @p: pointer to the page
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
- uint8_t *p, uint64_t *bytes_transferred)
-{
- int pages = -1;
-
- if (is_zero_range(p, TARGET_PAGE_SIZE)) {
- acct_info.dup_pages++;
- *bytes_transferred += save_page_header(f, block,
- offset | RAM_SAVE_FLAG_COMPRESS);
- qemu_put_byte(f, 0);
- *bytes_transferred += 1;
- pages = 1;
- }
-
- return pages;
-}
-
-/**
- * ram_save_page: Send the given page to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
- bool last_stage, uint64_t *bytes_transferred)
-{
- int pages = -1;
- uint64_t bytes_xmit;
- ram_addr_t current_addr;
- MemoryRegion *mr = block->mr;
- uint8_t *p;
- int ret;
- bool send_async = true;
-
- p = memory_region_get_ram_ptr(mr) + offset;
-
- /* In doubt sent page as normal */
- bytes_xmit = 0;
- ret = ram_control_save_page(f, block->offset,
- offset, TARGET_PAGE_SIZE, &bytes_xmit);
- if (bytes_xmit) {
- *bytes_transferred += bytes_xmit;
- pages = 1;
- }
-
- XBZRLE_cache_lock();
-
- current_addr = block->offset + offset;
-
- if (block == last_sent_block) {
- offset |= RAM_SAVE_FLAG_CONTINUE;
- }
- if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
- if (ret != RAM_SAVE_CONTROL_DELAYED) {
- if (bytes_xmit > 0) {
- acct_info.norm_pages++;
- } else if (bytes_xmit == 0) {
- acct_info.dup_pages++;
- }
- }
- } else {
- pages = save_zero_page(f, block, offset, p, bytes_transferred);
- if (pages > 0) {
- /* Must let xbzrle know, otherwise a previous (now 0'd) cached
- * page would be stale
- */
- xbzrle_cache_zero_page(current_addr);
- } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
- pages = save_xbzrle_page(f, &p, current_addr, block,
- offset, last_stage, bytes_transferred);
- if (!last_stage) {
- /* Can't send this cached data async, since the cache page
- * might get updated before it gets to the wire
- */
- send_async = false;
- }
- }
- }
-
- /* XBZRLE overflow or normal page */
- if (pages == -1) {
- *bytes_transferred += save_page_header(f, block,
- offset | RAM_SAVE_FLAG_PAGE);
- if (send_async) {
- qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
- } else {
- qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
- }
- *bytes_transferred += TARGET_PAGE_SIZE;
- pages = 1;
- acct_info.norm_pages++;
- }
-
- XBZRLE_cache_unlock();
-
- return pages;
-}
-
-static int do_compress_ram_page(CompressParam *param)
-{
- int bytes_sent, blen;
- uint8_t *p;
- RAMBlock *block = param->block;
- ram_addr_t offset = param->offset;
-
- p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
-
- bytes_sent = save_page_header(param->file, block, offset |
- RAM_SAVE_FLAG_COMPRESS_PAGE);
- blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
- migrate_compress_level());
- bytes_sent += blen;
-
- return bytes_sent;
-}
-
-static inline void start_compression(CompressParam *param)
-{
- param->done = false;
- qemu_mutex_lock(&param->mutex);
- param->start = true;
- qemu_cond_signal(&param->cond);
- qemu_mutex_unlock(&param->mutex);
-}
-
-static inline void start_decompression(DecompressParam *param)
-{
- qemu_mutex_lock(&param->mutex);
- param->start = true;
- qemu_cond_signal(&param->cond);
- qemu_mutex_unlock(&param->mutex);
-}
-
-static uint64_t bytes_transferred;
-
-static void flush_compressed_data(QEMUFile *f)
-{
- int idx, len, thread_count;
-
- if (!migrate_use_compression()) {
- return;
- }
- thread_count = migrate_compress_threads();
- for (idx = 0; idx < thread_count; idx++) {
- if (!comp_param[idx].done) {
- qemu_mutex_lock(comp_done_lock);
- while (!comp_param[idx].done && !quit_comp_thread) {
- qemu_cond_wait(comp_done_cond, comp_done_lock);
- }
- qemu_mutex_unlock(comp_done_lock);
- }
- if (!quit_comp_thread) {
- len = qemu_put_qemu_file(f, comp_param[idx].file);
- bytes_transferred += len;
- }
- }
-}
-
-static inline void set_compress_params(CompressParam *param, RAMBlock *block,
- ram_addr_t offset)
-{
- param->block = block;
- param->offset = offset;
-}
-
-static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
- ram_addr_t offset,
- uint64_t *bytes_transferred)
-{
- int idx, thread_count, bytes_xmit = -1, pages = -1;
-
- thread_count = migrate_compress_threads();
- qemu_mutex_lock(comp_done_lock);
- while (true) {
- for (idx = 0; idx < thread_count; idx++) {
- if (comp_param[idx].done) {
- bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
- set_compress_params(&comp_param[idx], block, offset);
- start_compression(&comp_param[idx]);
- pages = 1;
- acct_info.norm_pages++;
- *bytes_transferred += bytes_xmit;
- break;
- }
- }
- if (pages > 0) {
- break;
- } else {
- qemu_cond_wait(comp_done_cond, comp_done_lock);
- }
- }
- qemu_mutex_unlock(comp_done_lock);
-
- return pages;
-}
-
-/**
- * ram_save_compressed_page: compress the given page and send it to the stream
- *
- * Returns: Number of pages written.
- *
- * @f: QEMUFile where to send the data
- * @block: block that contains the page we want to send
- * @offset: offset inside the block for the page
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
- ram_addr_t offset, bool last_stage,
- uint64_t *bytes_transferred)
-{
- int pages = -1;
- uint64_t bytes_xmit;
- MemoryRegion *mr = block->mr;
- uint8_t *p;
- int ret;
-
- p = memory_region_get_ram_ptr(mr) + offset;
-
- bytes_xmit = 0;
- ret = ram_control_save_page(f, block->offset,
- offset, TARGET_PAGE_SIZE, &bytes_xmit);
- if (bytes_xmit) {
- *bytes_transferred += bytes_xmit;
- pages = 1;
- }
- if (block == last_sent_block) {
- offset |= RAM_SAVE_FLAG_CONTINUE;
- }
- if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
- if (ret != RAM_SAVE_CONTROL_DELAYED) {
- if (bytes_xmit > 0) {
- acct_info.norm_pages++;
- } else if (bytes_xmit == 0) {
- acct_info.dup_pages++;
- }
- }
- } else {
- /* When starting the process of a new block, the first page of
- * the block should be sent out before other pages in the same
- * block, and all the pages in last block should have been sent
- * out, keeping this order is important, because the 'cont' flag
- * is used to avoid resending the block name.
- */
- if (block != last_sent_block) {
- flush_compressed_data(f);
- pages = save_zero_page(f, block, offset, p, bytes_transferred);
- if (pages == -1) {
- set_compress_params(&comp_param[0], block, offset);
- /* Use the qemu thread to compress the data to make sure the
- * first page is sent out before other pages
- */
- bytes_xmit = do_compress_ram_page(&comp_param[0]);
- acct_info.norm_pages++;
- qemu_put_qemu_file(f, comp_param[0].file);
- *bytes_transferred += bytes_xmit;
- pages = 1;
- }
- } else {
- pages = save_zero_page(f, block, offset, p, bytes_transferred);
- if (pages == -1) {
- pages = compress_page_with_multi_thread(f, block, offset,
- bytes_transferred);
- }
- }
- }
-
- return pages;
-}
-
-/**
- * ram_find_and_save_block: Finds a dirty page and sends it to f
- *
- * Called within an RCU critical section.
- *
- * Returns: The number of pages written
- * 0 means no dirty pages
- *
- * @f: QEMUFile where to send the data
- * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- */
-
-static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
- uint64_t *bytes_transferred)
-{
- RAMBlock *block = last_seen_block;
- ram_addr_t offset = last_offset;
- bool complete_round = false;
- int pages = 0;
- MemoryRegion *mr;
-
- if (!block)
- block = QLIST_FIRST_RCU(&ram_list.blocks);
-
- while (true) {
- mr = block->mr;
- offset = migration_bitmap_find_and_reset_dirty(mr, offset);
- if (complete_round && block == last_seen_block &&
- offset >= last_offset) {
- break;
- }
- if (offset >= block->used_length) {
- offset = 0;
- block = QLIST_NEXT_RCU(block, next);
- if (!block) {
- block = QLIST_FIRST_RCU(&ram_list.blocks);
- complete_round = true;
- ram_bulk_stage = false;
- if (migrate_use_xbzrle()) {
- /* If xbzrle is on, stop using the data compression at this
- * point. In theory, xbzrle can do better than compression.
- */
- flush_compressed_data(f);
- compression_switch = false;
- }
- }
- } else {
- if (compression_switch && migrate_use_compression()) {
- pages = ram_save_compressed_page(f, block, offset, last_stage,
- bytes_transferred);
- } else {
- pages = ram_save_page(f, block, offset, last_stage,
- bytes_transferred);
- }
-
- /* if page is unmodified, continue to the next */
- if (pages > 0) {
- last_sent_block = block;
- break;
- }
- }
- }
-
- last_seen_block = block;
- last_offset = offset;
-
- return pages;
-}
-
-void acct_update_position(QEMUFile *f, size_t size, bool zero)
-{
- uint64_t pages = size / TARGET_PAGE_SIZE;
- if (zero) {
- acct_info.dup_pages += pages;
- } else {
- acct_info.norm_pages += pages;
- bytes_transferred += size;
- qemu_update_position(f, size);
- }
-}
-
-static ram_addr_t ram_save_remaining(void)
-{
- return migration_dirty_pages;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
- return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
- return bytes_transferred;
-}
-
-uint64_t ram_bytes_total(void)
-{
- RAMBlock *block;
- uint64_t total = 0;
-
- rcu_read_lock();
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
- total += block->used_length;
- rcu_read_unlock();
- return total;
-}
-
-void free_xbzrle_decoded_buf(void)
-{
- g_free(xbzrle_decoded_buf);
- xbzrle_decoded_buf = NULL;
-}
-
-static void migration_end(void)
-{
- if (migration_bitmap) {
- memory_global_dirty_log_stop();
- g_free(migration_bitmap);
- migration_bitmap = NULL;
- }
-
- XBZRLE_cache_lock();
- if (XBZRLE.cache) {
- cache_fini(XBZRLE.cache);
- g_free(XBZRLE.encoded_buf);
- g_free(XBZRLE.current_buf);
- XBZRLE.cache = NULL;
- XBZRLE.encoded_buf = NULL;
- XBZRLE.current_buf = NULL;
- }
- XBZRLE_cache_unlock();
-}
-
-static void ram_migration_cancel(void *opaque)
-{
- migration_end();
-}
-
-static void reset_ram_globals(void)
-{
- last_seen_block = NULL;
- last_sent_block = NULL;
- last_offset = 0;
- last_version = ram_list.version;
- ram_bulk_stage = true;
-}
-
-#define MAX_WAIT 50 /* ms, half buffered_file limit */
-
-
-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
- * long-running RCU critical section. When rcu-reclaims in the code
- * start to become numerous it will be necessary to reduce the
- * granularity of these critical sections.
- */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
-{
- RAMBlock *block;
- int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
-
- mig_throttle_on = false;
- dirty_rate_high_cnt = 0;
- bitmap_sync_count = 0;
- migration_bitmap_sync_init();
-
- if (migrate_use_xbzrle()) {
- XBZRLE_cache_lock();
- XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
- TARGET_PAGE_SIZE,
- TARGET_PAGE_SIZE);
- if (!XBZRLE.cache) {
- XBZRLE_cache_unlock();
- error_report("Error creating cache");
- return -1;
- }
- XBZRLE_cache_unlock();
-
- /* We prefer not to abort if there is no memory */
- XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
- if (!XBZRLE.encoded_buf) {
- error_report("Error allocating encoded_buf");
- return -1;
- }
-
- XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
- if (!XBZRLE.current_buf) {
- error_report("Error allocating current_buf");
- g_free(XBZRLE.encoded_buf);
- XBZRLE.encoded_buf = NULL;
- return -1;
- }
-
- acct_clear();
- }
-
- /* iothread lock needed for ram_list.dirty_memory[] */
- qemu_mutex_lock_iothread();
- qemu_mutex_lock_ramlist();
- rcu_read_lock();
- bytes_transferred = 0;
- reset_ram_globals();
-
- ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
- migration_bitmap = bitmap_new(ram_bitmap_pages);
- bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
-
- /*
- * Count the total number of pages used by ram blocks not including any
- * gaps due to alignment or unplugs.
- */
- migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
-
- memory_global_dirty_log_start();
- migration_bitmap_sync();
- qemu_mutex_unlock_ramlist();
- qemu_mutex_unlock_iothread();
-
- qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
-
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- qemu_put_byte(f, strlen(block->idstr));
- qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
- qemu_put_be64(f, block->used_length);
- }
-
- rcu_read_unlock();
-
- ram_control_before_iterate(f, RAM_CONTROL_SETUP);
- ram_control_after_iterate(f, RAM_CONTROL_SETUP);
-
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
- return 0;
-}
-
-static int ram_save_iterate(QEMUFile *f, void *opaque)
-{
- int ret;
- int i;
- int64_t t0;
- int pages_sent = 0;
-
- rcu_read_lock();
- if (ram_list.version != last_version) {
- reset_ram_globals();
- }
-
- /* Read version before ram_list.blocks */
- smp_rmb();
-
- ram_control_before_iterate(f, RAM_CONTROL_ROUND);
-
- t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- i = 0;
- while ((ret = qemu_file_rate_limit(f)) == 0) {
- int pages;
-
- pages = ram_find_and_save_block(f, false, &bytes_transferred);
- /* no more pages to sent */
- if (pages == 0) {
- break;
- }
- pages_sent += pages;
- acct_info.iterations++;
- check_guest_throttling();
- /* we want to check in the 1st loop, just in case it was the 1st time
- and we had to sync the dirty bitmap.
- qemu_get_clock_ns() is a bit expensive, so we only check each some
- iterations
- */
- if ((i & 63) == 0) {
- uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
- if (t1 > MAX_WAIT) {
- DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
- t1, i);
- break;
- }
- }
- i++;
- }
- flush_compressed_data(f);
- rcu_read_unlock();
-
- /*
- * Must occur before EOS (or any QEMUFile operation)
- * because of RDMA protocol.
- */
- ram_control_after_iterate(f, RAM_CONTROL_ROUND);
-
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
- bytes_transferred += 8;
-
- ret = qemu_file_get_error(f);
- if (ret < 0) {
- return ret;
- }
-
- return pages_sent;
-}
-
-/* Called with iothread lock */
-static int ram_save_complete(QEMUFile *f, void *opaque)
-{
- rcu_read_lock();
-
- migration_bitmap_sync();
-
- ram_control_before_iterate(f, RAM_CONTROL_FINISH);
-
- /* try transferring iterative blocks of memory */
-
- /* flush all remaining blocks regardless of rate limiting */
- while (true) {
- int pages;
-
- pages = ram_find_and_save_block(f, true, &bytes_transferred);
- /* no more blocks to sent */
- if (pages == 0) {
- break;
- }
- }
-
- flush_compressed_data(f);
- ram_control_after_iterate(f, RAM_CONTROL_FINISH);
- migration_end();
-
- rcu_read_unlock();
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
- return 0;
-}
-
-static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
-{
- uint64_t remaining_size;
-
- remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-
- if (remaining_size < max_size) {
- qemu_mutex_lock_iothread();
- rcu_read_lock();
- migration_bitmap_sync();
- rcu_read_unlock();
- qemu_mutex_unlock_iothread();
- remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
- }
- return remaining_size;
-}
-
-static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
-{
- unsigned int xh_len;
- int xh_flags;
-
- if (!xbzrle_decoded_buf) {
- xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
- }
-
- /* extract RLE header */
- xh_flags = qemu_get_byte(f);
- xh_len = qemu_get_be16(f);
-
- if (xh_flags != ENCODING_FLAG_XBZRLE) {
- error_report("Failed to load XBZRLE page - wrong compression!");
- return -1;
- }
-
- if (xh_len > TARGET_PAGE_SIZE) {
- error_report("Failed to load XBZRLE page - len overflow!");
- return -1;
- }
- /* load data and decode */
- qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
-
- /* decode RLE */
- if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
- TARGET_PAGE_SIZE) == -1) {
- error_report("Failed to load XBZRLE page - decode error!");
- return -1;
- }
-
- return 0;
-}
-
-/* Must be called from within a rcu critical section.
- * Returns a pointer from within the RCU-protected ram_list.
- */
-static inline void *host_from_stream_offset(QEMUFile *f,
- ram_addr_t offset,
- int flags)
-{
- static RAMBlock *block = NULL;
- char id[256];
- uint8_t len;
-
- if (flags & RAM_SAVE_FLAG_CONTINUE) {
- if (!block || block->max_length <= offset) {
- error_report("Ack, bad migration stream!");
- return NULL;
- }
-
- return memory_region_get_ram_ptr(block->mr) + offset;
- }
-
- len = qemu_get_byte(f);
- qemu_get_buffer(f, (uint8_t *)id, len);
- id[len] = 0;
-
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- if (!strncmp(id, block->idstr, sizeof(id)) &&
- block->max_length > offset) {
- return memory_region_get_ram_ptr(block->mr) + offset;
- }
- }
-
- error_report("Can't find block %s!", id);
- return NULL;
-}
-
-/*
- * If a page (or a whole RDMA chunk) has been
- * determined to be zero, then zap it.
- */
-void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
-{
- if (ch != 0 || !is_zero_range(host, size)) {
- memset(host, ch, size);
- }
-}
-
-static void *do_data_decompress(void *opaque)
-{
- DecompressParam *param = opaque;
- unsigned long pagesize;
-
- while (!quit_decomp_thread) {
- qemu_mutex_lock(&param->mutex);
- while (!param->start && !quit_decomp_thread) {
- qemu_cond_wait(&param->cond, &param->mutex);
- pagesize = TARGET_PAGE_SIZE;
- if (!quit_decomp_thread) {
- /* uncompress() will return failed in some case, especially
- * when the page is dirted when doing the compression, it's
- * not a problem because the dirty page will be retransferred
- * and uncompress() won't break the data in other pages.
- */
- uncompress((Bytef *)param->des, &pagesize,
- (const Bytef *)param->compbuf, param->len);
- }
- param->start = false;
- }
- qemu_mutex_unlock(&param->mutex);
- }
-
- return NULL;
-}
-
-void migrate_decompress_threads_create(void)
-{
- int i, thread_count;
-
- thread_count = migrate_decompress_threads();
- decompress_threads = g_new0(QemuThread, thread_count);
- decomp_param = g_new0(DecompressParam, thread_count);
- compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
- quit_decomp_thread = false;
- for (i = 0; i < thread_count; i++) {
- qemu_mutex_init(&decomp_param[i].mutex);
- qemu_cond_init(&decomp_param[i].cond);
- decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
- qemu_thread_create(decompress_threads + i, "decompress",
- do_data_decompress, decomp_param + i,
- QEMU_THREAD_JOINABLE);
- }
-}
-
-void migrate_decompress_threads_join(void)
-{
- int i, thread_count;
-
- quit_decomp_thread = true;
- thread_count = migrate_decompress_threads();
- for (i = 0; i < thread_count; i++) {
- qemu_mutex_lock(&decomp_param[i].mutex);
- qemu_cond_signal(&decomp_param[i].cond);
- qemu_mutex_unlock(&decomp_param[i].mutex);
- }
- for (i = 0; i < thread_count; i++) {
- qemu_thread_join(decompress_threads + i);
- qemu_mutex_destroy(&decomp_param[i].mutex);
- qemu_cond_destroy(&decomp_param[i].cond);
- g_free(decomp_param[i].compbuf);
- }
- g_free(decompress_threads);
- g_free(decomp_param);
- g_free(compressed_data_buf);
- decompress_threads = NULL;
- decomp_param = NULL;
- compressed_data_buf = NULL;
-}
-
-static void decompress_data_with_multi_threads(uint8_t *compbuf,
- void *host, int len)
-{
- int idx, thread_count;
-
- thread_count = migrate_decompress_threads();
- while (true) {
- for (idx = 0; idx < thread_count; idx++) {
- if (!decomp_param[idx].start) {
- memcpy(decomp_param[idx].compbuf, compbuf, len);
- decomp_param[idx].des = host;
- decomp_param[idx].len = len;
- start_decompression(&decomp_param[idx]);
- break;
- }
- }
- if (idx < thread_count) {
- break;
- }
- }
-}
-
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
-{
- int flags = 0, ret = 0;
- static uint64_t seq_iter;
- int len = 0;
-
- seq_iter++;
-
- if (version_id != 4) {
- ret = -EINVAL;
- }
-
- /* This RCU critical section can be very long running.
- * When RCU reclaims in the code start to become numerous,
- * it will be necessary to reduce the granularity of this
- * critical section.
- */
- rcu_read_lock();
- while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
- ram_addr_t addr, total_ram_bytes;
- void *host;
- uint8_t ch;
-
- addr = qemu_get_be64(f);
- flags = addr & ~TARGET_PAGE_MASK;
- addr &= TARGET_PAGE_MASK;
-
- switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
- case RAM_SAVE_FLAG_MEM_SIZE:
- /* Synchronize RAM block list */
- total_ram_bytes = addr;
- while (!ret && total_ram_bytes) {
- RAMBlock *block;
- uint8_t len;
- char id[256];
- ram_addr_t length;
-
- len = qemu_get_byte(f);
- qemu_get_buffer(f, (uint8_t *)id, len);
- id[len] = 0;
- length = qemu_get_be64(f);
-
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- if (!strncmp(id, block->idstr, sizeof(id))) {
- if (length != block->used_length) {
- Error *local_err = NULL;
-
- ret = qemu_ram_resize(block->offset, length, &local_err);
- if (local_err) {
- error_report_err(local_err);
- }
- }
- break;
- }
- }
-
- if (!block) {
- error_report("Unknown ramblock \"%s\", cannot "
- "accept migration", id);
- ret = -EINVAL;
- }
-
- total_ram_bytes -= length;
- }
- break;
- case RAM_SAVE_FLAG_COMPRESS:
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
- ch = qemu_get_byte(f);
- ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
- break;
- case RAM_SAVE_FLAG_PAGE:
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
- qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
- break;
- case RAM_SAVE_FLAG_COMPRESS_PAGE:
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
-
- len = qemu_get_be32(f);
- if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
- error_report("Invalid compressed data length: %d", len);
- ret = -EINVAL;
- break;
- }
- qemu_get_buffer(f, compressed_data_buf, len);
- decompress_data_with_multi_threads(compressed_data_buf, host, len);
- break;
- case RAM_SAVE_FLAG_XBZRLE:
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
- if (load_xbzrle(f, addr, host) < 0) {
- error_report("Failed to decompress XBZRLE page at "
- RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
- break;
- case RAM_SAVE_FLAG_EOS:
- /* normal exit */
- break;
- default:
- if (flags & RAM_SAVE_FLAG_HOOK) {
- ram_control_load_hook(f, flags);
- } else {
- error_report("Unknown combination of migration flags: %#x",
- flags);
- ret = -EINVAL;
- }
- }
- if (!ret) {
- ret = qemu_file_get_error(f);
- }
- }
-
- rcu_read_unlock();
- DPRINTF("Completed load of VM with exit code %d seq iteration "
- "%" PRIu64 "\n", ret, seq_iter);
- return ret;
-}
-
-static SaveVMHandlers savevm_ram_handlers = {
- .save_live_setup = ram_save_setup,
- .save_live_iterate = ram_save_iterate,
- .save_live_complete = ram_save_complete,
- .save_live_pending = ram_save_pending,
- .load_state = ram_load,
- .cancel = ram_migration_cancel,
-};
-
-void ram_mig_init(void)
-{
- qemu_mutex_init(&XBZRLE.lock);
- register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
-}
-
struct soundhw {
const char *name;
const char *descr;
@@ -1869,52 +307,3 @@ TargetInfo *qmp_query_target(Error **errp)
return info;
}
-
-/* Stub function that's gets run on the vcpu when its brought out of the
- VM to run inside qemu via async_run_on_cpu()*/
-static void mig_sleep_cpu(void *opq)
-{
- qemu_mutex_unlock_iothread();
- g_usleep(30*1000);
- qemu_mutex_lock_iothread();
-}
-
-/* To reduce the dirty rate explicitly disallow the VCPUs from spending
- much time in the VM. The migration thread will try to catchup.
- Workload will experience a performance drop.
-*/
-static void mig_throttle_guest_down(void)
-{
- CPUState *cpu;
-
- qemu_mutex_lock_iothread();
- CPU_FOREACH(cpu) {
- async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
- }
- qemu_mutex_unlock_iothread();
-}
-
-static void check_guest_throttling(void)
-{
- static int64_t t0;
- int64_t t1;
-
- if (!mig_throttle_on) {
- return;
- }
-
- if (!t0) {
- t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- return;
- }
-
- t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
- /* If it has been more than 40 ms since the last time the guest
- * was throttled then do it again.
- */
- if (40 < (t1-t0)/1000000) {
- mig_throttle_guest_down();
- t0 = t1;
- }
-}
diff --git a/cpus.c b/cpus.c
index f38b858f9b..b85fb5f03f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -480,6 +480,7 @@ static const VMStateDescription icount_vmstate_timers = {
.name = "timer/icount",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = icount_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT64(qemu_icount_bias, TimersState),
VMSTATE_INT64(qemu_icount, TimersState),
@@ -497,13 +498,9 @@ static const VMStateDescription vmstate_timers = {
VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &icount_vmstate_timers,
- .needed = icount_state_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &icount_vmstate_timers,
+ NULL
}
};
diff --git a/docs/migration.txt b/docs/migration.txt
index 0492a4547a..f6df4beb2a 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -257,6 +257,7 @@ const VMStateDescription vmstate_ide_drive_pio_state = {
.minimum_version_id = 1,
.pre_save = ide_drive_pio_pre_save,
.post_load = ide_drive_pio_post_load,
+ .needed = ide_drive_pio_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(req_nb_sectors, IDEState),
VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
@@ -279,13 +280,9 @@ const VMStateDescription vmstate_ide_drive = {
.... several fields ....
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_ide_drive_pio_state,
- .needed = ide_drive_pio_state_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_ide_drive_pio_state,
+ NULL
}
};
diff --git a/exec.c b/exec.c
index 487583b1bd..76bfc4ac4a 100644
--- a/exec.c
+++ b/exec.c
@@ -454,6 +454,7 @@ static const VMStateDescription vmstate_cpu_common_exception_index = {
.name = "cpu_common/exception_index",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = cpu_common_exception_index_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(exception_index, CPUState),
VMSTATE_END_OF_LIST()
@@ -471,13 +472,9 @@ const VMStateDescription vmstate_cpu_common = {
VMSTATE_UINT32(interrupt_request, CPUState),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_cpu_common_exception_index,
- .needed = cpu_common_exception_index_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_cpu_common_exception_index,
+ NULL
}
};
@@ -3348,14 +3345,20 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
return res;
}
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
{
RAMBlock *block;
+ int ret = 0;
rcu_read_lock();
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- func(block->host, block->offset, block->used_length, opaque);
+ ret = func(block->idstr, block->host, block->offset,
+ block->used_length, opaque);
+ if (ret) {
+ break;
+ }
}
rcu_read_unlock();
+ return ret;
}
#endif
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 25bc023882..8a64ffb38f 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -152,6 +152,7 @@ static const VMStateDescription vmstate_memhp_state = {
.version_id = 1,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
+ .needed = vmstate_test_use_memhp,
.fields = (VMStateField[]) {
VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs),
VMSTATE_END_OF_LIST()
@@ -175,12 +176,9 @@ const VMStateDescription vmstate_ich9_pm = {
VMSTATE_UINT32(smi_sts, ICH9LPCPMRegs),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_memhp_state,
- .needed = vmstate_test_use_memhp,
- },
- VMSTATE_END_OF_LIST()
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_memhp_state,
+ NULL
}
};
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index b730ca6ced..3bd1d5a865 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -260,6 +260,7 @@ static const VMStateDescription vmstate_memhp_state = {
.version_id = 1,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
+ .needed = vmstate_test_use_memhp,
.fields = (VMStateField[]) {
VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState),
VMSTATE_END_OF_LIST()
@@ -298,12 +299,9 @@ static const VMStateDescription vmstate_acpi = {
vmstate_test_use_acpi_pci_hotplug),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_memhp_state,
- .needed = vmstate_test_use_memhp,
- },
- VMSTATE_END_OF_LIST()
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_memhp_state,
+ NULL
}
};
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6e794597dc..5e1b67ee43 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -699,6 +699,7 @@ static const VMStateDescription vmstate_fdrive_media_changed = {
.name = "fdrive/media_changed",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fdrive_media_changed_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(media_changed, FDrive),
VMSTATE_END_OF_LIST()
@@ -716,6 +717,7 @@ static const VMStateDescription vmstate_fdrive_media_rate = {
.name = "fdrive/media_rate",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fdrive_media_rate_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(media_rate, FDrive),
VMSTATE_END_OF_LIST()
@@ -733,6 +735,7 @@ static const VMStateDescription vmstate_fdrive_perpendicular = {
.name = "fdrive/perpendicular",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fdrive_perpendicular_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(perpendicular, FDrive),
VMSTATE_END_OF_LIST()
@@ -756,19 +759,11 @@ static const VMStateDescription vmstate_fdrive = {
VMSTATE_UINT8(sect, FDrive),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_fdrive_media_changed,
- .needed = &fdrive_media_changed_needed,
- } , {
- .vmsd = &vmstate_fdrive_media_rate,
- .needed = &fdrive_media_rate_needed,
- } , {
- .vmsd = &vmstate_fdrive_perpendicular,
- .needed = &fdrive_perpendicular_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_fdrive_media_changed,
+ &vmstate_fdrive_media_rate,
+ &vmstate_fdrive_perpendicular,
+ NULL
}
};
@@ -833,6 +828,7 @@ static const VMStateDescription vmstate_fdc_reset_sensei = {
.name = "fdc/reset_sensei",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fdc_reset_sensei_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(reset_sensei, FDCtrl),
VMSTATE_END_OF_LIST()
@@ -850,6 +846,7 @@ static const VMStateDescription vmstate_fdc_result_timer = {
.name = "fdc/result_timer",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fdc_result_timer_needed,
.fields = (VMStateField[]) {
VMSTATE_TIMER_PTR(result_timer, FDCtrl),
VMSTATE_END_OF_LIST()
@@ -867,6 +864,7 @@ static const VMStateDescription vmstate_fdc_phase = {
.name = "fdc/phase",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fdc_phase_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(phase, FDCtrl),
VMSTATE_END_OF_LIST()
@@ -911,19 +909,11 @@ static const VMStateDescription vmstate_fdc = {
vmstate_fdrive, FDrive),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_fdc_reset_sensei,
- .needed = fdc_reset_sensei_needed,
- } , {
- .vmsd = &vmstate_fdc_result_timer,
- .needed = fdc_result_timer_needed,
- } , {
- .vmsd = &vmstate_fdc_phase,
- .needed = fdc_phase_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_fdc_reset_sensei,
+ &vmstate_fdc_result_timer,
+ &vmstate_fdc_phase,
+ NULL
}
};
diff --git a/hw/char/serial.c b/hw/char/serial.c
index 55011cfd26..513d73c27f 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -662,6 +662,7 @@ static const VMStateDescription vmstate_serial_thr_ipending = {
.name = "serial/thr_ipending",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = serial_thr_ipending_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(thr_ipending, SerialState),
VMSTATE_END_OF_LIST()
@@ -678,6 +679,7 @@ static const VMStateDescription vmstate_serial_tsr = {
.name = "serial/tsr",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = serial_tsr_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(tsr_retry, SerialState),
VMSTATE_UINT8(thr, SerialState),
@@ -697,6 +699,7 @@ static const VMStateDescription vmstate_serial_recv_fifo = {
.name = "serial/recv_fifo",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = serial_recv_fifo_needed,
.fields = (VMStateField[]) {
VMSTATE_STRUCT(recv_fifo, SerialState, 1, vmstate_fifo8, Fifo8),
VMSTATE_END_OF_LIST()
@@ -713,6 +716,7 @@ static const VMStateDescription vmstate_serial_xmit_fifo = {
.name = "serial/xmit_fifo",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = serial_xmit_fifo_needed,
.fields = (VMStateField[]) {
VMSTATE_STRUCT(xmit_fifo, SerialState, 1, vmstate_fifo8, Fifo8),
VMSTATE_END_OF_LIST()
@@ -729,6 +733,7 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = {
.name = "serial/fifo_timeout_timer",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = serial_fifo_timeout_timer_needed,
.fields = (VMStateField[]) {
VMSTATE_TIMER_PTR(fifo_timeout_timer, SerialState),
VMSTATE_END_OF_LIST()
@@ -745,6 +750,7 @@ static const VMStateDescription vmstate_serial_timeout_ipending = {
.name = "serial/timeout_ipending",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = serial_timeout_ipending_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(timeout_ipending, SerialState),
VMSTATE_END_OF_LIST()
@@ -760,6 +766,7 @@ static bool serial_poll_needed(void *opaque)
static const VMStateDescription vmstate_serial_poll = {
.name = "serial/poll",
.version_id = 1,
+ .needed = serial_poll_needed,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_INT32(poll_msl, SerialState),
@@ -788,31 +795,15 @@ const VMStateDescription vmstate_serial = {
VMSTATE_UINT8_V(fcr_vmstate, SerialState, 3),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_serial_thr_ipending,
- .needed = &serial_thr_ipending_needed,
- } , {
- .vmsd = &vmstate_serial_tsr,
- .needed = &serial_tsr_needed,
- } , {
- .vmsd = &vmstate_serial_recv_fifo,
- .needed = &serial_recv_fifo_needed,
- } , {
- .vmsd = &vmstate_serial_xmit_fifo,
- .needed = &serial_xmit_fifo_needed,
- } , {
- .vmsd = &vmstate_serial_fifo_timeout_timer,
- .needed = &serial_fifo_timeout_timer_needed,
- } , {
- .vmsd = &vmstate_serial_timeout_ipending,
- .needed = &serial_timeout_ipending_needed,
- } , {
- .vmsd = &vmstate_serial_poll,
- .needed = &serial_poll_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_serial_thr_ipending,
+ &vmstate_serial_tsr,
+ &vmstate_serial_recv_fifo,
+ &vmstate_serial_xmit_fifo,
+ &vmstate_serial_fifo_timeout_timer,
+ &vmstate_serial_timeout_ipending,
+ &vmstate_serial_poll,
+ NULL
}
};
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index b220e2d5d2..722146ec3a 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2220,6 +2220,7 @@ static VMStateDescription qxl_vmstate_monitors_config = {
.name = "qxl/monitors-config",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = qxl_monitors_config_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(guest_monitors_config, PCIQXLDevice),
VMSTATE_END_OF_LIST()
@@ -2253,13 +2254,9 @@ static VMStateDescription qxl_vmstate = {
VMSTATE_UINT64(guest_cursor, PCIQXLDevice),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &qxl_vmstate_monitors_config,
- .needed = qxl_monitors_config_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &qxl_vmstate_monitors_config,
+ NULL
}
};
diff --git a/hw/display/vga.c b/hw/display/vga.c
index d1d296c74e..b35d523e65 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2035,6 +2035,7 @@ static const VMStateDescription vmstate_vga_endian = {
.name = "vga.endian",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = vga_endian_state_needed,
.fields = (VMStateField[]) {
VMSTATE_BOOL(big_endian_fb, VGACommonState),
VMSTATE_END_OF_LIST()
@@ -2078,13 +2079,9 @@ const VMStateDescription vmstate_vga_common = {
VMSTATE_UINT32(vbe_bank_mask, VGACommonState),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_vga_endian,
- .needed = vga_endian_state_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_vga_endian,
+ NULL
}
};
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 5253e6d4fa..e142f75649 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -52,6 +52,7 @@
#ifdef CONFIG_XEN
# include <xen/hvm/hvm_info_table.h>
#endif
+#include "migration/migration.h"
#define MAX_IDE_BUS 2
@@ -305,6 +306,7 @@ static void pc_init1(MachineState *machine)
static void pc_compat_2_3(MachineState *machine)
{
+ savevm_skip_section_footers();
}
static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 110dfb78a8..b68263d231 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -45,6 +45,7 @@
#include "hw/usb.h"
#include "hw/cpu/icc_bus.h"
#include "qemu/error-report.h"
+#include "migration/migration.h"
/* ICH9 AHCI has 6 ports */
#define MAX_SATA_PORTS 6
@@ -289,6 +290,7 @@ static void pc_q35_init(MachineState *machine)
static void pc_compat_2_3(MachineState *machine)
{
+ savevm_skip_section_footers();
}
static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index fcb908061c..1efd98af63 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2561,6 +2561,7 @@ static const VMStateDescription vmstate_ide_atapi_gesn_state = {
.name ="ide_drive/atapi/gesn_state",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = ide_atapi_gesn_needed,
.fields = (VMStateField[]) {
VMSTATE_BOOL(events.new_media, IDEState),
VMSTATE_BOOL(events.eject_request, IDEState),
@@ -2572,6 +2573,7 @@ static const VMStateDescription vmstate_ide_tray_state = {
.name = "ide_drive/tray_state",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = ide_tray_state_needed,
.fields = (VMStateField[]) {
VMSTATE_BOOL(tray_open, IDEState),
VMSTATE_BOOL(tray_locked, IDEState),
@@ -2585,6 +2587,7 @@ static const VMStateDescription vmstate_ide_drive_pio_state = {
.minimum_version_id = 1,
.pre_save = ide_drive_pio_pre_save,
.post_load = ide_drive_pio_post_load,
+ .needed = ide_drive_pio_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(req_nb_sectors, IDEState),
VMSTATE_VARRAY_INT32(io_buffer, IDEState, io_buffer_total_len, 1,
@@ -2626,19 +2629,11 @@ const VMStateDescription vmstate_ide_drive = {
VMSTATE_UINT8_V(cdrom_changed, IDEState, 3),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_ide_drive_pio_state,
- .needed = ide_drive_pio_state_needed,
- }, {
- .vmsd = &vmstate_ide_tray_state,
- .needed = ide_tray_state_needed,
- }, {
- .vmsd = &vmstate_ide_atapi_gesn_state,
- .needed = ide_atapi_gesn_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_ide_drive_pio_state,
+ &vmstate_ide_tray_state,
+ &vmstate_ide_atapi_gesn_state,
+ NULL
}
};
@@ -2646,6 +2641,7 @@ static const VMStateDescription vmstate_ide_error_status = {
.name ="ide_bus/error",
.version_id = 2,
.minimum_version_id = 1,
+ .needed = ide_error_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(error_status, IDEBus),
VMSTATE_INT64_V(retry_sector_num, IDEBus, 2),
@@ -2664,13 +2660,9 @@ const VMStateDescription vmstate_ide_bus = {
VMSTATE_UINT8(unit, IDEBus),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_ide_error_status,
- .needed = ide_error_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_ide_error_status,
+ NULL
}
};
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 4b5e32dcbe..4afd0cfe8c 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -350,6 +350,7 @@ static const VMStateDescription vmstate_bmdma_current = {
.name = "ide bmdma_current",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = ide_bmdma_current_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT32(cur_addr, BMDMAState),
VMSTATE_UINT32(cur_prd_last, BMDMAState),
@@ -363,6 +364,7 @@ static const VMStateDescription vmstate_bmdma_status = {
.name ="ide bmdma/status",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = ide_bmdma_status_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(status, BMDMAState),
VMSTATE_END_OF_LIST()
@@ -383,16 +385,10 @@ static const VMStateDescription vmstate_bmdma = {
VMSTATE_UINT8(migration_retry_unit, BMDMAState),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_bmdma_current,
- .needed = ide_bmdma_current_needed,
- }, {
- .vmsd = &vmstate_bmdma_status,
- .needed = ide_bmdma_status_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_bmdma_current,
+ &vmstate_bmdma_status,
+ NULL
}
};
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index 9b9a7d7a8a..ddac69df6f 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -391,23 +391,24 @@ static int kbd_outport_post_load(void *opaque, int version_id)
return 0;
}
+static bool kbd_outport_needed(void *opaque)
+{
+ KBDState *s = opaque;
+ return s->outport != kbd_outport_default(s);
+}
+
static const VMStateDescription vmstate_kbd_outport = {
.name = "pckbd_outport",
.version_id = 1,
.minimum_version_id = 1,
.post_load = kbd_outport_post_load,
+ .needed = kbd_outport_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(outport, KBDState),
VMSTATE_END_OF_LIST()
}
};
-static bool kbd_outport_needed(void *opaque)
-{
- KBDState *s = opaque;
- return s->outport != kbd_outport_default(s);
-}
-
static int kbd_post_load(void *opaque, int version_id)
{
KBDState *s = opaque;
@@ -430,12 +431,9 @@ static const VMStateDescription vmstate_kbd = {
VMSTATE_UINT8(pending, KBDState),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_kbd_outport,
- .needed = kbd_outport_needed,
- },
- VMSTATE_END_OF_LIST()
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_kbd_outport,
+ NULL
}
};
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index 4baeea2b56..fdbe565e62 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -677,6 +677,7 @@ static const VMStateDescription vmstate_ps2_keyboard_ledstate = {
.version_id = 3,
.minimum_version_id = 2,
.post_load = ps2_kbd_ledstate_post_load,
+ .needed = ps2_keyboard_ledstate_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(ledstate, PS2KbdState),
VMSTATE_END_OF_LIST()
@@ -717,13 +718,9 @@ static const VMStateDescription vmstate_ps2_keyboard = {
VMSTATE_INT32_V(scancode_set, PS2KbdState,3),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_ps2_keyboard_ledstate,
- .needed = ps2_keyboard_ledstate_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_ps2_keyboard_ledstate,
+ NULL
}
};
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d595d63a51..0032b97c5f 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -369,6 +369,7 @@ static const VMStateDescription vmstate_apic_common_sipi = {
.name = "apic_sipi",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = apic_common_sipi_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32(sipi_vector, APICCommonState),
VMSTATE_INT32(wait_for_sipi, APICCommonState),
@@ -408,12 +409,9 @@ static const VMStateDescription vmstate_apic_common = {
APICCommonState), /* open-coded timer state */
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_apic_common_sipi,
- .needed = apic_common_sipi_needed,
- },
- VMSTATE_END_OF_LIST()
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_apic_common_sipi,
+ NULL
}
};
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 71a9f7a716..b3e0b1fd52 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -650,6 +650,7 @@ static const VMStateDescription vmstate_ich9_rst_cnt = {
.name = "ICH9LPC/rst_cnt",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = ich9_rst_cnt_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(rst_cnt, ICH9LPCState),
VMSTATE_END_OF_LIST()
@@ -669,12 +670,9 @@ static const VMStateDescription vmstate_ich9_lpc = {
VMSTATE_UINT32(sci_level, ICH9LPCState),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_ich9_rst_cnt,
- .needed = ich9_rst_cnt_needed
- },
- { 0 }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_ich9_rst_cnt,
+ NULL
}
};
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 091d61acc3..bab8e2abfb 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1370,6 +1370,7 @@ static const VMStateDescription vmstate_e1000_mit_state = {
.name = "e1000/mit_state",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = e1000_mit_state_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT32(mac_reg[RDTR], E1000State),
VMSTATE_UINT32(mac_reg[RADV], E1000State),
@@ -1457,13 +1458,9 @@ static const VMStateDescription vmstate_e1000 = {
VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_e1000_mit_state,
- .needed = e1000_mit_state_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_e1000_mit_state,
+ NULL
}
};
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index f868108dfe..e0db4727ae 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3240,6 +3240,7 @@ static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
.name = "rtl8139/hotplug_ready",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = rtl8139_hotplug_ready_needed,
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
}
@@ -3335,13 +3336,9 @@ static const VMStateDescription vmstate_rtl8139 = {
VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_rtl8139_hotplug_ready,
- .needed = rtl8139_hotplug_ready_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_rtl8139_hotplug_ready,
+ NULL
}
};
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index dfb328debd..8bcdf3ed77 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2226,6 +2226,7 @@ static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
.version_id = 1,
.minimum_version_id = 1,
.pre_load = vmxnet3_mcast_list_pre_load,
+ .needed = vmxnet3_mc_list_needed,
.fields = (VMStateField[]) {
VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
mcast_list_buff_size),
@@ -2470,14 +2471,9 @@ static const VMStateDescription vmstate_vmxnet3 = {
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmxstate_vmxnet3_mcast_list,
- .needed = vmxnet3_mc_list_needed
- },
- {
- /* empty element. */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmxstate_vmxnet3_mcast_list,
+ NULL
}
};
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index f1712b86fe..ed2424c4cd 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -582,6 +582,7 @@ static const VMStateDescription vmstate_piix3_rcr = {
.name = "PIIX3/rcr",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = piix3_rcr_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(rcr, PIIX3State),
VMSTATE_END_OF_LIST()
@@ -600,12 +601,9 @@ static const VMStateDescription vmstate_piix3 = {
PIIX_NUM_PIRQS, 3),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_piix3_rcr,
- .needed = piix3_rcr_needed,
- },
- { 0 }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_piix3_rcr,
+ NULL
}
};
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index bd2c0e4caa..f50b2f08af 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -1968,6 +1968,7 @@ static const VMStateDescription vmstate_scsi_sense_state = {
.name = "SCSIDevice/sense",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = scsi_sense_state_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8_SUB_ARRAY(sense, SCSIDevice,
SCSI_SENSE_BUF_SIZE_OLD,
@@ -1998,13 +1999,9 @@ const VMStateDescription vmstate_scsi_device = {
},
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_scsi_sense_state,
- .needed = scsi_sense_state_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_scsi_sense_state,
+ NULL
}
};
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index b6b8a2063d..b50071ef93 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -283,6 +283,7 @@ static const VMStateDescription vmstate_hpet_rtc_irq_level = {
.name = "hpet/rtc_irq_level",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = hpet_rtc_irq_level_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(rtc_irq_level, HPETState),
VMSTATE_END_OF_LIST()
@@ -322,13 +323,9 @@ static const VMStateDescription vmstate_hpet = {
vmstate_hpet_timer, HPETTimer),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_hpet_rtc_irq_level,
- .needed = hpet_rtc_irq_level_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_hpet_rtc_irq_level,
+ NULL
}
};
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index f2b77fa118..32048258c9 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -733,22 +733,23 @@ static int rtc_post_load(void *opaque, int version_id)
return 0;
}
+static bool rtc_irq_reinject_on_ack_count_needed(void *opaque)
+{
+ RTCState *s = (RTCState *)opaque;
+ return s->irq_reinject_on_ack_count != 0;
+}
+
static const VMStateDescription vmstate_rtc_irq_reinject_on_ack_count = {
.name = "mc146818rtc/irq_reinject_on_ack_count",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = rtc_irq_reinject_on_ack_count_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT16(irq_reinject_on_ack_count, RTCState),
VMSTATE_END_OF_LIST()
}
};
-static bool rtc_irq_reinject_on_ack_count_needed(void *opaque)
-{
- RTCState *s = (RTCState *)opaque;
- return s->irq_reinject_on_ack_count != 0;
-}
-
static const VMStateDescription vmstate_rtc = {
.name = "mc146818rtc",
.version_id = 3,
@@ -770,13 +771,9 @@ static const VMStateDescription vmstate_rtc = {
VMSTATE_UINT64_V(next_alarm_time, RTCState, 3),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_rtc_irq_reinject_on_ack_count,
- .needed = rtc_irq_reinject_on_ack_count_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_rtc_irq_reinject_on_ack_count,
+ NULL
}
};
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 1a22c9c0cb..7d65818064 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -2034,6 +2034,7 @@ static const VMStateDescription vmstate_ohci_eof_timer = {
.version_id = 1,
.minimum_version_id = 1,
.pre_load = ohci_eof_timer_pre_load,
+ .needed = ohci_eof_timer_needed,
.fields = (VMStateField[]) {
VMSTATE_TIMER_PTR(eof_timer, OHCIState),
VMSTATE_END_OF_LIST()
@@ -2081,13 +2082,9 @@ static const VMStateDescription vmstate_ohci_state = {
VMSTATE_BOOL(async_complete, OHCIState),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_ohci_eof_timer,
- .needed = ohci_eof_timer_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_ohci_eof_timer,
+ NULL
}
};
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 242a654583..6b4218c037 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -2257,40 +2257,42 @@ static const VMStateInfo usbredir_ep_bufpq_vmstate_info = {
/* For endp_data migration */
+static bool usbredir_bulk_receiving_needed(void *priv)
+{
+ struct endp_data *endp = priv;
+
+ return endp->bulk_receiving_started;
+}
+
static const VMStateDescription usbredir_bulk_receiving_vmstate = {
.name = "usb-redir-ep/bulk-receiving",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = usbredir_bulk_receiving_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(bulk_receiving_started, struct endp_data),
VMSTATE_END_OF_LIST()
}
};
-static bool usbredir_bulk_receiving_needed(void *priv)
+static bool usbredir_stream_needed(void *priv)
{
struct endp_data *endp = priv;
- return endp->bulk_receiving_started;
+ return endp->max_streams;
}
static const VMStateDescription usbredir_stream_vmstate = {
.name = "usb-redir-ep/stream-state",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = usbredir_stream_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT32(max_streams, struct endp_data),
VMSTATE_END_OF_LIST()
}
};
-static bool usbredir_stream_needed(void *priv)
-{
- struct endp_data *endp = priv;
-
- return endp->max_streams;
-}
-
static const VMStateDescription usbredir_ep_vmstate = {
.name = "usb-redir-ep",
.version_id = 1,
@@ -2318,16 +2320,10 @@ static const VMStateDescription usbredir_ep_vmstate = {
VMSTATE_INT32(bufpq_target_size, struct endp_data),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &usbredir_bulk_receiving_vmstate,
- .needed = usbredir_bulk_receiving_needed,
- }, {
- .vmsd = &usbredir_stream_vmstate,
- .needed = usbredir_stream_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &usbredir_bulk_receiving_vmstate,
+ &usbredir_stream_vmstate,
+ NULL
}
};
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fb49ffcb2d..ee4e07c5e7 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1053,6 +1053,7 @@ static const VMStateDescription vmstate_virtio_device_endian = {
.name = "virtio/device_endian",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = &virtio_device_endian_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT8(device_endian, VirtIODevice),
VMSTATE_END_OF_LIST()
@@ -1063,6 +1064,7 @@ static const VMStateDescription vmstate_virtio_64bit_features = {
.name = "virtio/64bit_features",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = &virtio_64bit_features_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(guest_features, VirtIODevice),
VMSTATE_END_OF_LIST()
@@ -1077,16 +1079,10 @@ static const VMStateDescription vmstate_virtio = {
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_virtio_device_endian,
- .needed = &virtio_device_endian_needed
- },
- {
- .vmsd = &vmstate_virtio_64bit_features,
- .needed = &virtio_64bit_features_needed
- },
- { 0 }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_virtio_device_endian,
+ &vmstate_virtio_64bit_features,
+ NULL
}
};
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 43428bd030..de8a7200a9 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -126,10 +126,10 @@ void cpu_flush_icache_range(hwaddr start, int len);
extern struct MemoryRegion io_mem_rom;
extern struct MemoryRegion io_mem_notdirty;
-typedef void (RAMBlockIterFunc)(void *host_addr,
+typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
ram_addr_t offset, ram_addr_t length, void *opaque);
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
#endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index a6e025a248..9387c8c9d4 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -34,6 +34,7 @@
#define QEMU_VM_SECTION_FULL 0x04
#define QEMU_VM_SUBSECTION 0x05
#define QEMU_VM_VMDESCRIPTION 0x06
+#define QEMU_VM_SECTION_FOOTER 0x7e
struct MigrationParams {
bool blk;
@@ -42,6 +43,20 @@ struct MigrationParams {
typedef struct MigrationState MigrationState;
+typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
+
+/* State for the incoming migration */
+struct MigrationIncomingState {
+ QEMUFile *file;
+
+ /* See savevm.c */
+ LoadStateEntry_Head loadvm_handlers;
+};
+
+MigrationIncomingState *migration_incoming_get_current(void);
+MigrationIncomingState *migration_incoming_state_new(QEMUFile *f);
+void migration_incoming_state_destroy(void);
+
struct MigrationState
{
int64_t bandwidth_limit;
@@ -180,4 +195,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
ram_addr_t offset, size_t size,
uint64_t *bytes_sent);
+void ram_mig_init(void);
+void savevm_skip_section_footers(void);
#endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index a01c5b817e..4f67d79227 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -157,7 +157,7 @@ static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
void qemu_put_be16(QEMUFile *f, unsigned int v);
void qemu_put_be32(QEMUFile *f, unsigned int v);
void qemu_put_be64(QEMUFile *f, uint64_t v);
-int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset);
+int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset);
int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
int level);
@@ -312,4 +312,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
{
qemu_get_be64s(f, (uint64_t *)pv);
}
+
+size_t qemu_get_counted_string(QEMUFile *f, char buf[256]);
+
#endif
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index bc7616aaa8..7153b1e145 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -120,11 +120,6 @@ typedef struct {
bool (*field_exists)(void *opaque, int version_id);
} VMStateField;
-typedef struct VMStateSubsection {
- const VMStateDescription *vmsd;
- bool (*needed)(void *opaque);
-} VMStateSubsection;
-
struct VMStateDescription {
const char *name;
int unmigratable;
@@ -135,8 +130,9 @@ struct VMStateDescription {
int (*pre_load)(void *opaque);
int (*post_load)(void *opaque, int version_id);
void (*pre_save)(void *opaque);
+ bool (*needed)(void *opaque);
VMStateField *fields;
- const VMStateSubsection *subsections;
+ const VMStateDescription **subsections;
};
extern const VMStateDescription vmstate_dummy;
@@ -812,6 +808,8 @@ extern const VMStateInfo vmstate_info_bitmap;
#define SELF_ANNOUNCE_ROUNDS 5
+void loadvm_free_handlers(MigrationIncomingState *mis);
+
int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, int version_id);
void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index cde3314896..6fdcbcd524 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -31,6 +31,7 @@ typedef struct I2CBus I2CBus;
typedef struct I2SCodec I2SCodec;
typedef struct ISABus ISABus;
typedef struct ISADevice ISADevice;
+typedef struct LoadStateEntry LoadStateEntry;
typedef struct MACAddr MACAddr;
typedef struct MachineClass MachineClass;
typedef struct MachineState MachineState;
@@ -38,6 +39,7 @@ typedef struct MemoryListener MemoryListener;
typedef struct MemoryMappingList MemoryMappingList;
typedef struct MemoryRegion MemoryRegion;
typedef struct MemoryRegionSection MemoryRegionSection;
+typedef struct MigrationIncomingState MigrationIncomingState;
typedef struct MigrationParams MigrationParams;
typedef struct Monitor Monitor;
typedef struct MouseTransformInfo MouseTransformInfo;
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 54b36c16c4..c38892fec6 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -30,7 +30,6 @@ extern const uint32_t arch_type;
void select_soundhw(const char *optarg);
void do_acpitable_option(const QemuOpts *opts);
void do_smbios_option(QemuOpts *opts);
-void ram_mig_init(void);
void cpudef_init(void);
void audio_init(void);
int kvm_available(void);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 853d90a317..ef793f702e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -84,6 +84,7 @@ void qemu_announce_self(void);
bool qemu_savevm_state_blocked(Error **errp);
void qemu_savevm_state_begin(QEMUFile *f,
const MigrationParams *params);
+void qemu_savevm_state_header(QEMUFile *f);
int qemu_savevm_state_iterate(QEMUFile *f);
void qemu_savevm_state_complete(QEMUFile *f);
void qemu_savevm_state_cancel(void);
diff --git a/migration/migration.c b/migration/migration.c
index 732d229708..b04b4571a8 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -53,6 +53,7 @@ static bool deferred_incoming;
migrations at once. For now we don't need to add
dynamic creation of migration */
+/* For outgoing */
MigrationState *migrate_get_current(void)
{
static MigrationState current_migration = {
@@ -71,6 +72,30 @@ MigrationState *migrate_get_current(void)
return &current_migration;
}
+/* For incoming */
+static MigrationIncomingState *mis_current;
+
+MigrationIncomingState *migration_incoming_get_current(void)
+{
+ return mis_current;
+}
+
+MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
+{
+ mis_current = g_malloc0(sizeof(MigrationIncomingState));
+ mis_current->file = f;
+ QLIST_INIT(&mis_current->loadvm_handlers);
+
+ return mis_current;
+}
+
+void migration_incoming_state_destroy(void)
+{
+ loadvm_free_handlers(mis_current);
+ g_free(mis_current);
+ mis_current = NULL;
+}
+
/*
* Called on -incoming with a defer: uri.
* The migration can be started later after any parameters have been
@@ -115,9 +140,14 @@ static void process_incoming_migration_co(void *opaque)
Error *local_err = NULL;
int ret;
+ migration_incoming_state_new(f);
+
ret = qemu_loadvm_state(f);
+
qemu_fclose(f);
free_xbzrle_decoded_buf();
+ migration_incoming_state_destroy();
+
if (ret < 0) {
error_report("load of migration failed: %s", strerror(-ret));
migrate_decompress_threads_join();
@@ -738,6 +768,7 @@ static void *migration_thread(void *opaque)
int64_t start_time = initial_time;
bool old_vm_running = false;
+ qemu_savevm_state_header(s->file);
qemu_savevm_state_begin(s->file, &s->params);
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
@@ -838,9 +869,6 @@ static void *migration_thread(void *opaque)
void migrate_fd_connect(MigrationState *s)
{
- s->state = MIGRATION_STATUS_SETUP;
- trace_migrate_set_state(MIGRATION_STATUS_SETUP);
-
/* This is a best 1st approximation. ns to ms */
s->expected_downtime = max_downtime/1000000;
s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2750365a7e..965a757772 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -349,14 +349,14 @@ void qemu_file_skip(QEMUFile *f, int size)
}
/*
- * Read 'size' bytes from file (at 'offset') into buf without moving the
- * pointer.
+ * Read 'size' bytes from file (at 'offset') without moving the
+ * pointer and set 'buf' to point to that data.
*
* It will return size bytes unless there was an error, in which case it will
* return as many as it managed to read (assuming blocking fd's which
* all current QEMUFile are)
*/
-int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
+int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset)
{
int pending;
int index;
@@ -392,7 +392,7 @@ int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset)
size = pending;
}
- memcpy(buf, f->buf + index, size);
+ *buf = f->buf + index;
return size;
}
@@ -411,11 +411,13 @@ int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
while (pending > 0) {
int res;
+ uint8_t *src;
- res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0);
+ res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
if (res == 0) {
return done;
}
+ memcpy(buf, src, res);
qemu_file_skip(f, res);
buf += res;
pending -= res;
@@ -585,3 +587,20 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src)
}
return len;
}
+
+/*
+ * Get a string whose length is determined by a single preceding byte
+ * A preallocated 256 byte buffer must be passed in.
+ * Returns: len on success and a 0 terminated string in the buffer
+ * else 0
+ * (Note a 0 length string will return 0 either way)
+ */
+size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
+{
+ size_t len = qemu_get_byte(f);
+ size_t res = qemu_get_buffer(f, (uint8_t *)buf, len);
+
+ buf[res] = 0;
+
+ return res == len ? res : 0;
+}
diff --git a/migration/ram.c b/migration/ram.c
new file mode 100644
index 0000000000..57368e1575
--- /dev/null
+++ b/migration/ram.c
@@ -0,0 +1,1628 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2011-2015 Red Hat Inc
+ *
+ * Authors:
+ * Juan Quintela <quintela@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdint.h>
+#include <zlib.h>
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "migration/migration.h"
+#include "exec/address-spaces.h"
+#include "migration/page_cache.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "exec/ram_addr.h"
+#include "qemu/rcu_queue.h"
+
+#ifdef DEBUG_MIGRATION_RAM
+#define DPRINTF(fmt, ...) \
+ do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+ do { } while (0)
+#endif
+
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
+
+static uint64_t bitmap_sync_count;
+
+/***********************************************************/
+/* ram save/restore */
+
+#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
+#define RAM_SAVE_FLAG_COMPRESS 0x02
+#define RAM_SAVE_FLAG_MEM_SIZE 0x04
+#define RAM_SAVE_FLAG_PAGE 0x08
+#define RAM_SAVE_FLAG_EOS 0x10
+#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE 0x40
+/* 0x80 is reserved in migration.h start with 0x100 next */
+#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
+
+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
+
+static inline bool is_zero_range(uint8_t *p, uint64_t size)
+{
+ return buffer_find_nonzero_offset(p, size) == size;
+}
+
+/* struct contains XBZRLE cache and a static page
+ used by the compression */
+static struct {
+ /* buffer used for XBZRLE encoding */
+ uint8_t *encoded_buf;
+ /* buffer for storing page content */
+ uint8_t *current_buf;
+ /* Cache for XBZRLE, Protected by lock. */
+ PageCache *cache;
+ QemuMutex lock;
+} XBZRLE;
+
+/* buffer used for XBZRLE decoding */
+static uint8_t *xbzrle_decoded_buf;
+
+static void XBZRLE_cache_lock(void)
+{
+ if (migrate_use_xbzrle())
+ qemu_mutex_lock(&XBZRLE.lock);
+}
+
+static void XBZRLE_cache_unlock(void)
+{
+ if (migrate_use_xbzrle())
+ qemu_mutex_unlock(&XBZRLE.lock);
+}
+
+/*
+ * called from qmp_migrate_set_cache_size in main thread, possibly while
+ * a migration is in progress.
+ * A running migration maybe using the cache and might finish during this
+ * call, hence changes to the cache are protected by XBZRLE.lock().
+ */
+int64_t xbzrle_cache_resize(int64_t new_size)
+{
+ PageCache *new_cache;
+ int64_t ret;
+
+ if (new_size < TARGET_PAGE_SIZE) {
+ return -1;
+ }
+
+ XBZRLE_cache_lock();
+
+ if (XBZRLE.cache != NULL) {
+ if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
+ goto out_new_size;
+ }
+ new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
+ TARGET_PAGE_SIZE);
+ if (!new_cache) {
+ error_report("Error creating cache");
+ ret = -1;
+ goto out;
+ }
+
+ cache_fini(XBZRLE.cache);
+ XBZRLE.cache = new_cache;
+ }
+
+out_new_size:
+ ret = pow2floor(new_size);
+out:
+ XBZRLE_cache_unlock();
+ return ret;
+}
+
+/* accounting for migration statistics */
+typedef struct AccountingInfo {
+ uint64_t dup_pages;
+ uint64_t skipped_pages;
+ uint64_t norm_pages;
+ uint64_t iterations;
+ uint64_t xbzrle_bytes;
+ uint64_t xbzrle_pages;
+ uint64_t xbzrle_cache_miss;
+ double xbzrle_cache_miss_rate;
+ uint64_t xbzrle_overflows;
+} AccountingInfo;
+
+static AccountingInfo acct_info;
+
+static void acct_clear(void)
+{
+ memset(&acct_info, 0, sizeof(acct_info));
+}
+
+uint64_t dup_mig_bytes_transferred(void)
+{
+ return acct_info.dup_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t dup_mig_pages_transferred(void)
+{
+ return acct_info.dup_pages;
+}
+
+uint64_t skipped_mig_bytes_transferred(void)
+{
+ return acct_info.skipped_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t skipped_mig_pages_transferred(void)
+{
+ return acct_info.skipped_pages;
+}
+
+uint64_t norm_mig_bytes_transferred(void)
+{
+ return acct_info.norm_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t norm_mig_pages_transferred(void)
+{
+ return acct_info.norm_pages;
+}
+
+uint64_t xbzrle_mig_bytes_transferred(void)
+{
+ return acct_info.xbzrle_bytes;
+}
+
+uint64_t xbzrle_mig_pages_transferred(void)
+{
+ return acct_info.xbzrle_pages;
+}
+
+uint64_t xbzrle_mig_pages_cache_miss(void)
+{
+ return acct_info.xbzrle_cache_miss;
+}
+
+double xbzrle_mig_cache_miss_rate(void)
+{
+ return acct_info.xbzrle_cache_miss_rate;
+}
+
+uint64_t xbzrle_mig_pages_overflow(void)
+{
+ return acct_info.xbzrle_overflows;
+}
+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+struct CompressParam {
+ bool start;
+ bool done;
+ QEMUFile *file;
+ QemuMutex mutex;
+ QemuCond cond;
+ RAMBlock *block;
+ ram_addr_t offset;
+};
+typedef struct CompressParam CompressParam;
+
+struct DecompressParam {
+ bool start;
+ QemuMutex mutex;
+ QemuCond cond;
+ void *des;
+ uint8 *compbuf;
+ int len;
+};
+typedef struct DecompressParam DecompressParam;
+
+static CompressParam *comp_param;
+static QemuThread *compress_threads;
+/* comp_done_cond is used to wake up the migration thread when
+ * one of the compression threads has finished the compression.
+ * comp_done_lock is used to co-work with comp_done_cond.
+ */
+static QemuMutex *comp_done_lock;
+static QemuCond *comp_done_cond;
+/* The empty QEMUFileOps will be used by file in CompressParam */
+static const QEMUFileOps empty_ops = { };
+
+static bool compression_switch;
+static bool quit_comp_thread;
+static bool quit_decomp_thread;
+static DecompressParam *decomp_param;
+static QemuThread *decompress_threads;
+static uint8_t *compressed_data_buf;
+
+static int do_compress_ram_page(CompressParam *param);
+
+static void *do_data_compress(void *opaque)
+{
+ CompressParam *param = opaque;
+
+ while (!quit_comp_thread) {
+ qemu_mutex_lock(&param->mutex);
+ /* Re-check the quit_comp_thread in case of
+ * terminate_compression_threads is called just before
+ * qemu_mutex_lock(&param->mutex) and after
+ * while(!quit_comp_thread), re-check it here can make
+ * sure the compression thread terminate as expected.
+ */
+ while (!param->start && !quit_comp_thread) {
+ qemu_cond_wait(&param->cond, &param->mutex);
+ }
+ if (!quit_comp_thread) {
+ do_compress_ram_page(param);
+ }
+ param->start = false;
+ qemu_mutex_unlock(&param->mutex);
+
+ qemu_mutex_lock(comp_done_lock);
+ param->done = true;
+ qemu_cond_signal(comp_done_cond);
+ qemu_mutex_unlock(comp_done_lock);
+ }
+
+ return NULL;
+}
+
+static inline void terminate_compression_threads(void)
+{
+ int idx, thread_count;
+
+ thread_count = migrate_compress_threads();
+ quit_comp_thread = true;
+ for (idx = 0; idx < thread_count; idx++) {
+ qemu_mutex_lock(&comp_param[idx].mutex);
+ qemu_cond_signal(&comp_param[idx].cond);
+ qemu_mutex_unlock(&comp_param[idx].mutex);
+ }
+}
+
+void migrate_compress_threads_join(void)
+{
+ int i, thread_count;
+
+ if (!migrate_use_compression()) {
+ return;
+ }
+ terminate_compression_threads();
+ thread_count = migrate_compress_threads();
+ for (i = 0; i < thread_count; i++) {
+ qemu_thread_join(compress_threads + i);
+ qemu_fclose(comp_param[i].file);
+ qemu_mutex_destroy(&comp_param[i].mutex);
+ qemu_cond_destroy(&comp_param[i].cond);
+ }
+ qemu_mutex_destroy(comp_done_lock);
+ qemu_cond_destroy(comp_done_cond);
+ g_free(compress_threads);
+ g_free(comp_param);
+ g_free(comp_done_cond);
+ g_free(comp_done_lock);
+ compress_threads = NULL;
+ comp_param = NULL;
+ comp_done_cond = NULL;
+ comp_done_lock = NULL;
+}
+
+void migrate_compress_threads_create(void)
+{
+ int i, thread_count;
+
+ if (!migrate_use_compression()) {
+ return;
+ }
+ quit_comp_thread = false;
+ compression_switch = true;
+ thread_count = migrate_compress_threads();
+ compress_threads = g_new0(QemuThread, thread_count);
+ comp_param = g_new0(CompressParam, thread_count);
+ comp_done_cond = g_new0(QemuCond, 1);
+ comp_done_lock = g_new0(QemuMutex, 1);
+ qemu_cond_init(comp_done_cond);
+ qemu_mutex_init(comp_done_lock);
+ for (i = 0; i < thread_count; i++) {
+ /* com_param[i].file is just used as a dummy buffer to save data, set
+ * it's ops to empty.
+ */
+ comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
+ comp_param[i].done = true;
+ qemu_mutex_init(&comp_param[i].mutex);
+ qemu_cond_init(&comp_param[i].cond);
+ qemu_thread_create(compress_threads + i, "compress",
+ do_data_compress, comp_param + i,
+ QEMU_THREAD_JOINABLE);
+ }
+}
+
+/**
+ * save_page_header: Write page header to wire
+ *
+ * If this is the 1st block, it also writes the block identification
+ *
+ * Returns: Number of bytes written
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * in the lower bits, it contains flags
+ */
+static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
+{
+ size_t size;
+
+ qemu_put_be64(f, offset);
+ size = 8;
+
+ if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
+ qemu_put_byte(f, strlen(block->idstr));
+ qemu_put_buffer(f, (uint8_t *)block->idstr,
+ strlen(block->idstr));
+ size += 1 + strlen(block->idstr);
+ }
+ return size;
+}
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+ if (ram_bulk_stage || !migrate_use_xbzrle()) {
+ return;
+ }
+
+ /* We don't care if this fails to allocate a new cache page
+ * as long as it updated an old one */
+ cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
+ bitmap_sync_count);
+}
+
+#define ENCODING_FLAG_XBZRLE 0x1
+
+/**
+ * save_xbzrle_page: compress and send current page
+ *
+ * Returns: 1 means that we wrote the page
+ * 0 means that page is identical to the one already sent
+ * -1 means that xbzrle would be longer than normal
+ *
+ * @f: QEMUFile where to send the data
+ * @current_data:
+ * @current_addr:
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
+ ram_addr_t current_addr, RAMBlock *block,
+ ram_addr_t offset, bool last_stage,
+ uint64_t *bytes_transferred)
+{
+ int encoded_len = 0, bytes_xbzrle;
+ uint8_t *prev_cached_page;
+
+ if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
+ acct_info.xbzrle_cache_miss++;
+ if (!last_stage) {
+ if (cache_insert(XBZRLE.cache, current_addr, *current_data,
+ bitmap_sync_count) == -1) {
+ return -1;
+ } else {
+ /* update *current_data when the page has been
+ inserted into cache */
+ *current_data = get_cached_data(XBZRLE.cache, current_addr);
+ }
+ }
+ return -1;
+ }
+
+ prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
+
+ /* save current buffer into memory */
+ memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
+
+ /* XBZRLE encoding (if there is no overflow) */
+ encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
+ TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+ TARGET_PAGE_SIZE);
+ if (encoded_len == 0) {
+ DPRINTF("Skipping unmodified page\n");
+ return 0;
+ } else if (encoded_len == -1) {
+ DPRINTF("Overflow\n");
+ acct_info.xbzrle_overflows++;
+ /* update data in the cache */
+ if (!last_stage) {
+ memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
+ *current_data = prev_cached_page;
+ }
+ return -1;
+ }
+
+ /* we need to update the data in the cache, in order to get the same data */
+ if (!last_stage) {
+ memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
+ }
+
+ /* Send XBZRLE based compressed page */
+ bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
+ qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
+ qemu_put_be16(f, encoded_len);
+ qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
+ bytes_xbzrle += encoded_len + 1 + 2;
+ acct_info.xbzrle_pages++;
+ acct_info.xbzrle_bytes += bytes_xbzrle;
+ *bytes_transferred += bytes_xbzrle;
+
+ return 1;
+}
+
+static inline
+ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
+ ram_addr_t start)
+{
+ unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
+ unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+ uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
+ unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+
+ unsigned long next;
+
+ if (ram_bulk_stage && nr > base) {
+ next = nr + 1;
+ } else {
+ next = find_next_bit(migration_bitmap, size, nr);
+ }
+
+ if (next < size) {
+ clear_bit(next, migration_bitmap);
+ migration_dirty_pages--;
+ }
+ return (next - base) << TARGET_PAGE_BITS;
+}
+
+static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
+{
+ migration_dirty_pages +=
+ cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
+}
+
+
+/* Fix me: there are too many global variables used in migration process. */
+static int64_t start_time;
+static int64_t bytes_xfer_prev;
+static int64_t num_dirty_pages_period;
+static uint64_t xbzrle_cache_miss_prev;
+static uint64_t iterations_prev;
+
+static void migration_bitmap_sync_init(void)
+{
+ start_time = 0;
+ bytes_xfer_prev = 0;
+ num_dirty_pages_period = 0;
+ xbzrle_cache_miss_prev = 0;
+ iterations_prev = 0;
+}
+
+/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
+static void migration_bitmap_sync(void)
+{
+ RAMBlock *block;
+ uint64_t num_dirty_pages_init = migration_dirty_pages;
+ MigrationState *s = migrate_get_current();
+ int64_t end_time;
+ int64_t bytes_xfer_now;
+
+ bitmap_sync_count++;
+
+ if (!bytes_xfer_prev) {
+ bytes_xfer_prev = ram_bytes_transferred();
+ }
+
+ if (!start_time) {
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ }
+
+ trace_migration_bitmap_sync_start();
+ address_space_sync_dirty_bitmap(&address_space_memory);
+
+ rcu_read_lock();
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
+ }
+ rcu_read_unlock();
+
+ trace_migration_bitmap_sync_end(migration_dirty_pages
+ - num_dirty_pages_init);
+ num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+ end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+ /* more than 1 second = 1000 millisecons */
+ if (end_time > start_time + 1000) {
+ if (migrate_auto_converge()) {
+ /* The following detection logic can be refined later. For now:
+ Check to see if the dirtied bytes is 50% more than the approx.
+ amount of bytes that just got transferred since the last time we
+ were in this routine. If that happens >N times (for now N==4)
+ we turn on the throttle down logic */
+ bytes_xfer_now = ram_bytes_transferred();
+ if (s->dirty_pages_rate &&
+ (num_dirty_pages_period * TARGET_PAGE_SIZE >
+ (bytes_xfer_now - bytes_xfer_prev)/2) &&
+ (dirty_rate_high_cnt++ > 4)) {
+ trace_migration_throttle();
+ mig_throttle_on = true;
+ dirty_rate_high_cnt = 0;
+ }
+ bytes_xfer_prev = bytes_xfer_now;
+ } else {
+ mig_throttle_on = false;
+ }
+ if (migrate_use_xbzrle()) {
+ if (iterations_prev != acct_info.iterations) {
+ acct_info.xbzrle_cache_miss_rate =
+ (double)(acct_info.xbzrle_cache_miss -
+ xbzrle_cache_miss_prev) /
+ (acct_info.iterations - iterations_prev);
+ }
+ iterations_prev = acct_info.iterations;
+ xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
+ }
+ s->dirty_pages_rate = num_dirty_pages_period * 1000
+ / (end_time - start_time);
+ s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
+ start_time = end_time;
+ num_dirty_pages_period = 0;
+ }
+ s->dirty_sync_count = bitmap_sync_count;
+}
+
+/**
+ * save_zero_page: Send the zero page to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @p: pointer to the page
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+ uint8_t *p, uint64_t *bytes_transferred)
+{
+ int pages = -1;
+
+ if (is_zero_range(p, TARGET_PAGE_SIZE)) {
+ acct_info.dup_pages++;
+ *bytes_transferred += save_page_header(f, block,
+ offset | RAM_SAVE_FLAG_COMPRESS);
+ qemu_put_byte(f, 0);
+ *bytes_transferred += 1;
+ pages = 1;
+ }
+
+ return pages;
+}
+
+/**
+ * ram_save_page: Send the given page to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
+ bool last_stage, uint64_t *bytes_transferred)
+{
+ int pages = -1;
+ uint64_t bytes_xmit;
+ ram_addr_t current_addr;
+ MemoryRegion *mr = block->mr;
+ uint8_t *p;
+ int ret;
+ bool send_async = true;
+
+ p = memory_region_get_ram_ptr(mr) + offset;
+
+ /* In doubt sent page as normal */
+ bytes_xmit = 0;
+ ret = ram_control_save_page(f, block->offset,
+ offset, TARGET_PAGE_SIZE, &bytes_xmit);
+ if (bytes_xmit) {
+ *bytes_transferred += bytes_xmit;
+ pages = 1;
+ }
+
+ XBZRLE_cache_lock();
+
+ current_addr = block->offset + offset;
+
+ if (block == last_sent_block) {
+ offset |= RAM_SAVE_FLAG_CONTINUE;
+ }
+ if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+ if (ret != RAM_SAVE_CONTROL_DELAYED) {
+ if (bytes_xmit > 0) {
+ acct_info.norm_pages++;
+ } else if (bytes_xmit == 0) {
+ acct_info.dup_pages++;
+ }
+ }
+ } else {
+ pages = save_zero_page(f, block, offset, p, bytes_transferred);
+ if (pages > 0) {
+ /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+ * page would be stale
+ */
+ xbzrle_cache_zero_page(current_addr);
+ } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
+ pages = save_xbzrle_page(f, &p, current_addr, block,
+ offset, last_stage, bytes_transferred);
+ if (!last_stage) {
+ /* Can't send this cached data async, since the cache page
+ * might get updated before it gets to the wire
+ */
+ send_async = false;
+ }
+ }
+ }
+
+ /* XBZRLE overflow or normal page */
+ if (pages == -1) {
+ *bytes_transferred += save_page_header(f, block,
+ offset | RAM_SAVE_FLAG_PAGE);
+ if (send_async) {
+ qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+ } else {
+ qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+ }
+ *bytes_transferred += TARGET_PAGE_SIZE;
+ pages = 1;
+ acct_info.norm_pages++;
+ }
+
+ XBZRLE_cache_unlock();
+
+ return pages;
+}
+
+static int do_compress_ram_page(CompressParam *param)
+{
+ int bytes_sent, blen;
+ uint8_t *p;
+ RAMBlock *block = param->block;
+ ram_addr_t offset = param->offset;
+
+ p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
+
+ bytes_sent = save_page_header(param->file, block, offset |
+ RAM_SAVE_FLAG_COMPRESS_PAGE);
+ blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
+ migrate_compress_level());
+ bytes_sent += blen;
+
+ return bytes_sent;
+}
+
+static inline void start_compression(CompressParam *param)
+{
+ param->done = false;
+ qemu_mutex_lock(&param->mutex);
+ param->start = true;
+ qemu_cond_signal(&param->cond);
+ qemu_mutex_unlock(&param->mutex);
+}
+
+static inline void start_decompression(DecompressParam *param)
+{
+ qemu_mutex_lock(&param->mutex);
+ param->start = true;
+ qemu_cond_signal(&param->cond);
+ qemu_mutex_unlock(&param->mutex);
+}
+
+static uint64_t bytes_transferred;
+
+static void flush_compressed_data(QEMUFile *f)
+{
+ int idx, len, thread_count;
+
+ if (!migrate_use_compression()) {
+ return;
+ }
+ thread_count = migrate_compress_threads();
+ for (idx = 0; idx < thread_count; idx++) {
+ if (!comp_param[idx].done) {
+ qemu_mutex_lock(comp_done_lock);
+ while (!comp_param[idx].done && !quit_comp_thread) {
+ qemu_cond_wait(comp_done_cond, comp_done_lock);
+ }
+ qemu_mutex_unlock(comp_done_lock);
+ }
+ if (!quit_comp_thread) {
+ len = qemu_put_qemu_file(f, comp_param[idx].file);
+ bytes_transferred += len;
+ }
+ }
+}
+
+static inline void set_compress_params(CompressParam *param, RAMBlock *block,
+ ram_addr_t offset)
+{
+ param->block = block;
+ param->offset = offset;
+}
+
+static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
+ ram_addr_t offset,
+ uint64_t *bytes_transferred)
+{
+ int idx, thread_count, bytes_xmit = -1, pages = -1;
+
+ thread_count = migrate_compress_threads();
+ qemu_mutex_lock(comp_done_lock);
+ while (true) {
+ for (idx = 0; idx < thread_count; idx++) {
+ if (comp_param[idx].done) {
+ bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
+ set_compress_params(&comp_param[idx], block, offset);
+ start_compression(&comp_param[idx]);
+ pages = 1;
+ acct_info.norm_pages++;
+ *bytes_transferred += bytes_xmit;
+ break;
+ }
+ }
+ if (pages > 0) {
+ break;
+ } else {
+ qemu_cond_wait(comp_done_cond, comp_done_lock);
+ }
+ }
+ qemu_mutex_unlock(comp_done_lock);
+
+ return pages;
+}
+
+/**
+ * ram_save_compressed_page: compress the given page and send it to the stream
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: block that contains the page we want to send
+ * @offset: offset inside the block for the page
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
+ ram_addr_t offset, bool last_stage,
+ uint64_t *bytes_transferred)
+{
+ int pages = -1;
+ uint64_t bytes_xmit;
+ MemoryRegion *mr = block->mr;
+ uint8_t *p;
+ int ret;
+
+ p = memory_region_get_ram_ptr(mr) + offset;
+
+ bytes_xmit = 0;
+ ret = ram_control_save_page(f, block->offset,
+ offset, TARGET_PAGE_SIZE, &bytes_xmit);
+ if (bytes_xmit) {
+ *bytes_transferred += bytes_xmit;
+ pages = 1;
+ }
+ if (block == last_sent_block) {
+ offset |= RAM_SAVE_FLAG_CONTINUE;
+ }
+ if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+ if (ret != RAM_SAVE_CONTROL_DELAYED) {
+ if (bytes_xmit > 0) {
+ acct_info.norm_pages++;
+ } else if (bytes_xmit == 0) {
+ acct_info.dup_pages++;
+ }
+ }
+ } else {
+ /* When starting the process of a new block, the first page of
+ * the block should be sent out before other pages in the same
+ * block, and all the pages in last block should have been sent
+ * out, keeping this order is important, because the 'cont' flag
+ * is used to avoid resending the block name.
+ */
+ if (block != last_sent_block) {
+ flush_compressed_data(f);
+ pages = save_zero_page(f, block, offset, p, bytes_transferred);
+ if (pages == -1) {
+ set_compress_params(&comp_param[0], block, offset);
+ /* Use the qemu thread to compress the data to make sure the
+ * first page is sent out before other pages
+ */
+ bytes_xmit = do_compress_ram_page(&comp_param[0]);
+ acct_info.norm_pages++;
+ qemu_put_qemu_file(f, comp_param[0].file);
+ *bytes_transferred += bytes_xmit;
+ pages = 1;
+ }
+ } else {
+ pages = save_zero_page(f, block, offset, p, bytes_transferred);
+ if (pages == -1) {
+ pages = compress_page_with_multi_thread(f, block, offset,
+ bytes_transferred);
+ }
+ }
+ }
+
+ return pages;
+}
+
+/**
+ * ram_find_and_save_block: Finds a dirty page and sends it to f
+ *
+ * Called within an RCU critical section.
+ *
+ * Returns: The number of pages written
+ * 0 means no dirty pages
+ *
+ * @f: QEMUFile where to send the data
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+
+static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
+ uint64_t *bytes_transferred)
+{
+ RAMBlock *block = last_seen_block;
+ ram_addr_t offset = last_offset;
+ bool complete_round = false;
+ int pages = 0;
+ MemoryRegion *mr;
+
+ if (!block)
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
+
+ while (true) {
+ mr = block->mr;
+ offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+ if (complete_round && block == last_seen_block &&
+ offset >= last_offset) {
+ break;
+ }
+ if (offset >= block->used_length) {
+ offset = 0;
+ block = QLIST_NEXT_RCU(block, next);
+ if (!block) {
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
+ complete_round = true;
+ ram_bulk_stage = false;
+ if (migrate_use_xbzrle()) {
+ /* If xbzrle is on, stop using the data compression at this
+ * point. In theory, xbzrle can do better than compression.
+ */
+ flush_compressed_data(f);
+ compression_switch = false;
+ }
+ }
+ } else {
+ if (compression_switch && migrate_use_compression()) {
+ pages = ram_save_compressed_page(f, block, offset, last_stage,
+ bytes_transferred);
+ } else {
+ pages = ram_save_page(f, block, offset, last_stage,
+ bytes_transferred);
+ }
+
+ /* if page is unmodified, continue to the next */
+ if (pages > 0) {
+ last_sent_block = block;
+ break;
+ }
+ }
+ }
+
+ last_seen_block = block;
+ last_offset = offset;
+
+ return pages;
+}
+
+void acct_update_position(QEMUFile *f, size_t size, bool zero)
+{
+ uint64_t pages = size / TARGET_PAGE_SIZE;
+ if (zero) {
+ acct_info.dup_pages += pages;
+ } else {
+ acct_info.norm_pages += pages;
+ bytes_transferred += size;
+ qemu_update_position(f, size);
+ }
+}
+
+static ram_addr_t ram_save_remaining(void)
+{
+ return migration_dirty_pages;
+}
+
+uint64_t ram_bytes_remaining(void)
+{
+ return ram_save_remaining() * TARGET_PAGE_SIZE;
+}
+
+uint64_t ram_bytes_transferred(void)
+{
+ return bytes_transferred;
+}
+
+uint64_t ram_bytes_total(void)
+{
+ RAMBlock *block;
+ uint64_t total = 0;
+
+ rcu_read_lock();
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
+ total += block->used_length;
+ rcu_read_unlock();
+ return total;
+}
+
+void free_xbzrle_decoded_buf(void)
+{
+ g_free(xbzrle_decoded_buf);
+ xbzrle_decoded_buf = NULL;
+}
+
+static void migration_end(void)
+{
+ if (migration_bitmap) {
+ memory_global_dirty_log_stop();
+ g_free(migration_bitmap);
+ migration_bitmap = NULL;
+ }
+
+ XBZRLE_cache_lock();
+ if (XBZRLE.cache) {
+ cache_fini(XBZRLE.cache);
+ g_free(XBZRLE.encoded_buf);
+ g_free(XBZRLE.current_buf);
+ XBZRLE.cache = NULL;
+ XBZRLE.encoded_buf = NULL;
+ XBZRLE.current_buf = NULL;
+ }
+ XBZRLE_cache_unlock();
+}
+
+static void ram_migration_cancel(void *opaque)
+{
+ migration_end();
+}
+
+static void reset_ram_globals(void)
+{
+ last_seen_block = NULL;
+ last_sent_block = NULL;
+ last_offset = 0;
+ last_version = ram_list.version;
+ ram_bulk_stage = true;
+}
+
+#define MAX_WAIT 50 /* ms, half buffered_file limit */
+
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section. When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
+static int ram_save_setup(QEMUFile *f, void *opaque)
+{
+ RAMBlock *block;
+ int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
+
+ mig_throttle_on = false;
+ dirty_rate_high_cnt = 0;
+ bitmap_sync_count = 0;
+ migration_bitmap_sync_init();
+
+ if (migrate_use_xbzrle()) {
+ XBZRLE_cache_lock();
+ XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
+ TARGET_PAGE_SIZE,
+ TARGET_PAGE_SIZE);
+ if (!XBZRLE.cache) {
+ XBZRLE_cache_unlock();
+ error_report("Error creating cache");
+ return -1;
+ }
+ XBZRLE_cache_unlock();
+
+ /* We prefer not to abort if there is no memory */
+ XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
+ if (!XBZRLE.encoded_buf) {
+ error_report("Error allocating encoded_buf");
+ return -1;
+ }
+
+ XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
+ if (!XBZRLE.current_buf) {
+ error_report("Error allocating current_buf");
+ g_free(XBZRLE.encoded_buf);
+ XBZRLE.encoded_buf = NULL;
+ return -1;
+ }
+
+ acct_clear();
+ }
+
+ /* iothread lock needed for ram_list.dirty_memory[] */
+ qemu_mutex_lock_iothread();
+ qemu_mutex_lock_ramlist();
+ rcu_read_lock();
+ bytes_transferred = 0;
+ reset_ram_globals();
+
+ ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+ migration_bitmap = bitmap_new(ram_bitmap_pages);
+ bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
+
+ /*
+ * Count the total number of pages used by ram blocks not including any
+ * gaps due to alignment or unplugs.
+ */
+ migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+
+ memory_global_dirty_log_start();
+ migration_bitmap_sync();
+ qemu_mutex_unlock_ramlist();
+ qemu_mutex_unlock_iothread();
+
+ qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ qemu_put_byte(f, strlen(block->idstr));
+ qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
+ qemu_put_be64(f, block->used_length);
+ }
+
+ rcu_read_unlock();
+
+ ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+ ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+ return 0;
+}
+
+static int ram_save_iterate(QEMUFile *f, void *opaque)
+{
+ int ret;
+ int i;
+ int64_t t0;
+ int pages_sent = 0;
+
+ rcu_read_lock();
+ if (ram_list.version != last_version) {
+ reset_ram_globals();
+ }
+
+ /* Read version before ram_list.blocks */
+ smp_rmb();
+
+ ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+
+ t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ i = 0;
+ while ((ret = qemu_file_rate_limit(f)) == 0) {
+ int pages;
+
+ pages = ram_find_and_save_block(f, false, &bytes_transferred);
+ /* no more pages to sent */
+ if (pages == 0) {
+ break;
+ }
+ pages_sent += pages;
+ acct_info.iterations++;
+ check_guest_throttling();
+ /* we want to check in the 1st loop, just in case it was the 1st time
+ and we had to sync the dirty bitmap.
+ qemu_get_clock_ns() is a bit expensive, so we only check each some
+ iterations
+ */
+ if ((i & 63) == 0) {
+ uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
+ if (t1 > MAX_WAIT) {
+ DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
+ t1, i);
+ break;
+ }
+ }
+ i++;
+ }
+ flush_compressed_data(f);
+ rcu_read_unlock();
+
+ /*
+ * Must occur before EOS (or any QEMUFile operation)
+ * because of RDMA protocol.
+ */
+ ram_control_after_iterate(f, RAM_CONTROL_ROUND);
+
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+ bytes_transferred += 8;
+
+ ret = qemu_file_get_error(f);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return pages_sent;
+}
+
+/* Called with iothread lock */
+static int ram_save_complete(QEMUFile *f, void *opaque)
+{
+ rcu_read_lock();
+
+ migration_bitmap_sync();
+
+ ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+
+ /* try transferring iterative blocks of memory */
+
+ /* flush all remaining blocks regardless of rate limiting */
+ while (true) {
+ int pages;
+
+ pages = ram_find_and_save_block(f, true, &bytes_transferred);
+ /* no more blocks to sent */
+ if (pages == 0) {
+ break;
+ }
+ }
+
+ flush_compressed_data(f);
+ ram_control_after_iterate(f, RAM_CONTROL_FINISH);
+ migration_end();
+
+ rcu_read_unlock();
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+ return 0;
+}
+
+static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+ uint64_t remaining_size;
+
+ remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+
+ if (remaining_size < max_size) {
+ qemu_mutex_lock_iothread();
+ rcu_read_lock();
+ migration_bitmap_sync();
+ rcu_read_unlock();
+ qemu_mutex_unlock_iothread();
+ remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+ }
+ return remaining_size;
+}
+
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+ unsigned int xh_len;
+ int xh_flags;
+
+ if (!xbzrle_decoded_buf) {
+ xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+ }
+
+ /* extract RLE header */
+ xh_flags = qemu_get_byte(f);
+ xh_len = qemu_get_be16(f);
+
+ if (xh_flags != ENCODING_FLAG_XBZRLE) {
+ error_report("Failed to load XBZRLE page - wrong compression!");
+ return -1;
+ }
+
+ if (xh_len > TARGET_PAGE_SIZE) {
+ error_report("Failed to load XBZRLE page - len overflow!");
+ return -1;
+ }
+ /* load data and decode */
+ qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
+
+ /* decode RLE */
+ if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
+ TARGET_PAGE_SIZE) == -1) {
+ error_report("Failed to load XBZRLE page - decode error!");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Must be called from within a rcu critical section.
+ * Returns a pointer from within the RCU-protected ram_list.
+ */
+static inline void *host_from_stream_offset(QEMUFile *f,
+ ram_addr_t offset,
+ int flags)
+{
+ static RAMBlock *block = NULL;
+ char id[256];
+ uint8_t len;
+
+ if (flags & RAM_SAVE_FLAG_CONTINUE) {
+ if (!block || block->max_length <= offset) {
+ error_report("Ack, bad migration stream!");
+ return NULL;
+ }
+
+ return memory_region_get_ram_ptr(block->mr) + offset;
+ }
+
+ len = qemu_get_byte(f);
+ qemu_get_buffer(f, (uint8_t *)id, len);
+ id[len] = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ if (!strncmp(id, block->idstr, sizeof(id)) &&
+ block->max_length > offset) {
+ return memory_region_get_ram_ptr(block->mr) + offset;
+ }
+ }
+
+ error_report("Can't find block %s!", id);
+ return NULL;
+}
+
+/*
+ * If a page (or a whole RDMA chunk) has been
+ * determined to be zero, then zap it.
+ */
+void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
+{
+ if (ch != 0 || !is_zero_range(host, size)) {
+ memset(host, ch, size);
+ }
+}
+
+static void *do_data_decompress(void *opaque)
+{
+ DecompressParam *param = opaque;
+ unsigned long pagesize;
+
+ while (!quit_decomp_thread) {
+ qemu_mutex_lock(&param->mutex);
+ while (!param->start && !quit_decomp_thread) {
+ qemu_cond_wait(&param->cond, &param->mutex);
+ pagesize = TARGET_PAGE_SIZE;
+ if (!quit_decomp_thread) {
+ /* uncompress() will return failed in some case, especially
+ * when the page is dirted when doing the compression, it's
+ * not a problem because the dirty page will be retransferred
+ * and uncompress() won't break the data in other pages.
+ */
+ uncompress((Bytef *)param->des, &pagesize,
+ (const Bytef *)param->compbuf, param->len);
+ }
+ param->start = false;
+ }
+ qemu_mutex_unlock(&param->mutex);
+ }
+
+ return NULL;
+}
+
+void migrate_decompress_threads_create(void)
+{
+ int i, thread_count;
+
+ thread_count = migrate_decompress_threads();
+ decompress_threads = g_new0(QemuThread, thread_count);
+ decomp_param = g_new0(DecompressParam, thread_count);
+ compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
+ quit_decomp_thread = false;
+ for (i = 0; i < thread_count; i++) {
+ qemu_mutex_init(&decomp_param[i].mutex);
+ qemu_cond_init(&decomp_param[i].cond);
+ decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
+ qemu_thread_create(decompress_threads + i, "decompress",
+ do_data_decompress, decomp_param + i,
+ QEMU_THREAD_JOINABLE);
+ }
+}
+
+void migrate_decompress_threads_join(void)
+{
+ int i, thread_count;
+
+ quit_decomp_thread = true;
+ thread_count = migrate_decompress_threads();
+ for (i = 0; i < thread_count; i++) {
+ qemu_mutex_lock(&decomp_param[i].mutex);
+ qemu_cond_signal(&decomp_param[i].cond);
+ qemu_mutex_unlock(&decomp_param[i].mutex);
+ }
+ for (i = 0; i < thread_count; i++) {
+ qemu_thread_join(decompress_threads + i);
+ qemu_mutex_destroy(&decomp_param[i].mutex);
+ qemu_cond_destroy(&decomp_param[i].cond);
+ g_free(decomp_param[i].compbuf);
+ }
+ g_free(decompress_threads);
+ g_free(decomp_param);
+ g_free(compressed_data_buf);
+ decompress_threads = NULL;
+ decomp_param = NULL;
+ compressed_data_buf = NULL;
+}
+
+static void decompress_data_with_multi_threads(uint8_t *compbuf,
+ void *host, int len)
+{
+ int idx, thread_count;
+
+ thread_count = migrate_decompress_threads();
+ while (true) {
+ for (idx = 0; idx < thread_count; idx++) {
+ if (!decomp_param[idx].start) {
+ memcpy(decomp_param[idx].compbuf, compbuf, len);
+ decomp_param[idx].des = host;
+ decomp_param[idx].len = len;
+ start_decompression(&decomp_param[idx]);
+ break;
+ }
+ }
+ if (idx < thread_count) {
+ break;
+ }
+ }
+}
+
+static int ram_load(QEMUFile *f, void *opaque, int version_id)
+{
+ int flags = 0, ret = 0;
+ static uint64_t seq_iter;
+ int len = 0;
+
+ seq_iter++;
+
+ if (version_id != 4) {
+ ret = -EINVAL;
+ }
+
+ /* This RCU critical section can be very long running.
+ * When RCU reclaims in the code start to become numerous,
+ * it will be necessary to reduce the granularity of this
+ * critical section.
+ */
+ rcu_read_lock();
+ while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
+ ram_addr_t addr, total_ram_bytes;
+ void *host;
+ uint8_t ch;
+
+ addr = qemu_get_be64(f);
+ flags = addr & ~TARGET_PAGE_MASK;
+ addr &= TARGET_PAGE_MASK;
+
+ switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
+ case RAM_SAVE_FLAG_MEM_SIZE:
+ /* Synchronize RAM block list */
+ total_ram_bytes = addr;
+ while (!ret && total_ram_bytes) {
+ RAMBlock *block;
+ char id[256];
+ ram_addr_t length;
+
+ len = qemu_get_byte(f);
+ qemu_get_buffer(f, (uint8_t *)id, len);
+ id[len] = 0;
+ length = qemu_get_be64(f);
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ if (!strncmp(id, block->idstr, sizeof(id))) {
+ if (length != block->used_length) {
+ Error *local_err = NULL;
+
+ ret = qemu_ram_resize(block->offset, length, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ }
+ break;
+ }
+ }
+
+ if (!block) {
+ error_report("Unknown ramblock \"%s\", cannot "
+ "accept migration", id);
+ ret = -EINVAL;
+ }
+
+ total_ram_bytes -= length;
+ }
+ break;
+ case RAM_SAVE_FLAG_COMPRESS:
+ host = host_from_stream_offset(f, addr, flags);
+ if (!host) {
+ error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+ ret = -EINVAL;
+ break;
+ }
+ ch = qemu_get_byte(f);
+ ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+ break;
+ case RAM_SAVE_FLAG_PAGE:
+ host = host_from_stream_offset(f, addr, flags);
+ if (!host) {
+ error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+ ret = -EINVAL;
+ break;
+ }
+ qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+ break;
+ case RAM_SAVE_FLAG_COMPRESS_PAGE:
+ host = host_from_stream_offset(f, addr, flags);
+ if (!host) {
+ error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
+ ret = -EINVAL;
+ break;
+ }
+
+ len = qemu_get_be32(f);
+ if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
+ error_report("Invalid compressed data length: %d", len);
+ ret = -EINVAL;
+ break;
+ }
+ qemu_get_buffer(f, compressed_data_buf, len);
+ decompress_data_with_multi_threads(compressed_data_buf, host, len);
+ break;
+ case RAM_SAVE_FLAG_XBZRLE:
+ host = host_from_stream_offset(f, addr, flags);
+ if (!host) {
+ error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+ ret = -EINVAL;
+ break;
+ }
+ if (load_xbzrle(f, addr, host) < 0) {
+ error_report("Failed to decompress XBZRLE page at "
+ RAM_ADDR_FMT, addr);
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ case RAM_SAVE_FLAG_EOS:
+ /* normal exit */
+ break;
+ default:
+ if (flags & RAM_SAVE_FLAG_HOOK) {
+ ram_control_load_hook(f, flags);
+ } else {
+ error_report("Unknown combination of migration flags: %#x",
+ flags);
+ ret = -EINVAL;
+ }
+ }
+ if (!ret) {
+ ret = qemu_file_get_error(f);
+ }
+ }
+
+ rcu_read_unlock();
+ DPRINTF("Completed load of VM with exit code %d seq iteration "
+ "%" PRIu64 "\n", ret, seq_iter);
+ return ret;
+}
+
+static SaveVMHandlers savevm_ram_handlers = {
+ .save_live_setup = ram_save_setup,
+ .save_live_iterate = ram_save_iterate,
+ .save_live_complete = ram_save_complete,
+ .save_live_pending = ram_save_pending,
+ .load_state = ram_load,
+ .cancel = ram_migration_cancel,
+};
+
+void ram_mig_init(void)
+{
+ qemu_mutex_init(&XBZRLE.lock);
+ register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
+}
+/* Stub function that's gets run on the vcpu when its brought out of the
+ VM to run inside qemu via async_run_on_cpu()*/
+
+static void mig_sleep_cpu(void *opq)
+{
+ qemu_mutex_unlock_iothread();
+ g_usleep(30*1000);
+ qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+ much time in the VM. The migration thread will try to catchup.
+ Workload will experience a performance drop.
+*/
+static void mig_throttle_guest_down(void)
+{
+ CPUState *cpu;
+
+ qemu_mutex_lock_iothread();
+ CPU_FOREACH(cpu) {
+ async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+ static int64_t t0;
+ int64_t t1;
+
+ if (!mig_throttle_on) {
+ return;
+ }
+
+ if (!t0) {
+ t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ return;
+ }
+
+ t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+ /* If it has been more than 40 ms since the last time the guest
+ * was throttled then do it again.
+ */
+ if (40 < (t1-t0)/1000000) {
+ mig_throttle_guest_down();
+ t0 = t1;
+ }
+}
diff --git a/migration/rdma.c b/migration/rdma.c
index 77e34441dc..48b3e64b34 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -236,13 +236,13 @@ typedef struct RDMALocalBlock {
* corresponding RDMALocalBlock with
* the information needed to perform the actual RDMA.
*/
-typedef struct QEMU_PACKED RDMARemoteBlock {
+typedef struct QEMU_PACKED RDMADestBlock {
uint64_t remote_host_addr;
uint64_t offset;
uint64_t length;
uint32_t remote_rkey;
uint32_t padding;
-} RDMARemoteBlock;
+} RDMADestBlock;
static uint64_t htonll(uint64_t v)
{
@@ -258,20 +258,20 @@ static uint64_t ntohll(uint64_t v) {
return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]);
}
-static void remote_block_to_network(RDMARemoteBlock *rb)
+static void dest_block_to_network(RDMADestBlock *db)
{
- rb->remote_host_addr = htonll(rb->remote_host_addr);
- rb->offset = htonll(rb->offset);
- rb->length = htonll(rb->length);
- rb->remote_rkey = htonl(rb->remote_rkey);
+ db->remote_host_addr = htonll(db->remote_host_addr);
+ db->offset = htonll(db->offset);
+ db->length = htonll(db->length);
+ db->remote_rkey = htonl(db->remote_rkey);
}
-static void network_to_remote_block(RDMARemoteBlock *rb)
+static void network_to_dest_block(RDMADestBlock *db)
{
- rb->remote_host_addr = ntohll(rb->remote_host_addr);
- rb->offset = ntohll(rb->offset);
- rb->length = ntohll(rb->length);
- rb->remote_rkey = ntohl(rb->remote_rkey);
+ db->remote_host_addr = ntohll(db->remote_host_addr);
+ db->offset = ntohll(db->offset);
+ db->length = ntohll(db->length);
+ db->remote_rkey = ntohl(db->remote_rkey);
}
/*
@@ -350,7 +350,7 @@ typedef struct RDMAContext {
* Description of ram blocks used throughout the code.
*/
RDMALocalBlocks local_ram_blocks;
- RDMARemoteBlock *block;
+ RDMADestBlock *dest_blocks;
/*
* Migration on *destination* started.
@@ -570,10 +570,10 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
* in advanced before the migration starts. This tells us where the RAM blocks
* are so that we can register them individually.
*/
-static void qemu_rdma_init_one_block(void *host_addr,
+static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
ram_addr_t block_offset, ram_addr_t length, void *opaque)
{
- rdma_add_block(opaque, host_addr, block_offset, length);
+ return rdma_add_block(opaque, host_addr, block_offset, length);
}
/*
@@ -590,7 +590,7 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
memset(local, 0, sizeof *local);
qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
- rdma->block = (RDMARemoteBlock *) g_malloc0(sizeof(RDMARemoteBlock) *
+ rdma->dest_blocks = (RDMADestBlock *) g_malloc0(sizeof(RDMADestBlock) *
rdma->local_ram_blocks.nb_blocks);
local->init = true;
return 0;
@@ -790,6 +790,13 @@ static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs)
for (x = 0; x < num_devices; x++) {
verbs = ibv_open_device(dev_list[x]);
+ if (!verbs) {
+ if (errno == EPERM) {
+ continue;
+ } else {
+ return -EINVAL;
+ }
+ }
if (ibv_query_port(verbs, 1, &port_attr)) {
ibv_close_device(verbs);
@@ -2177,8 +2184,8 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
rdma->connected = false;
}
- g_free(rdma->block);
- rdma->block = NULL;
+ g_free(rdma->dest_blocks);
+ rdma->dest_blocks = NULL;
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
if (rdma->wr_data[idx].control_mr) {
@@ -2445,7 +2452,6 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
if (host_port) {
rdma = g_malloc0(sizeof(RDMAContext));
- memset(rdma, 0, sizeof(RDMAContext));
rdma->current_index = -1;
rdma->current_chunk = -1;
@@ -2967,25 +2973,25 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
* their "local" descriptions with what was sent.
*/
for (i = 0; i < local->nb_blocks; i++) {
- rdma->block[i].remote_host_addr =
+ rdma->dest_blocks[i].remote_host_addr =
(uintptr_t)(local->block[i].local_host_addr);
if (rdma->pin_all) {
- rdma->block[i].remote_rkey = local->block[i].mr->rkey;
+ rdma->dest_blocks[i].remote_rkey = local->block[i].mr->rkey;
}
- rdma->block[i].offset = local->block[i].offset;
- rdma->block[i].length = local->block[i].length;
+ rdma->dest_blocks[i].offset = local->block[i].offset;
+ rdma->dest_blocks[i].length = local->block[i].length;
- remote_block_to_network(&rdma->block[i]);
+ dest_block_to_network(&rdma->dest_blocks[i]);
}
blocks.len = rdma->local_ram_blocks.nb_blocks
- * sizeof(RDMARemoteBlock);
+ * sizeof(RDMADestBlock);
ret = qemu_rdma_post_send_control(rdma,
- (uint8_t *) rdma->block, &blocks);
+ (uint8_t *) rdma->dest_blocks, &blocks);
if (ret < 0) {
error_report("rdma migration: error sending remote info");
@@ -3141,7 +3147,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
if (flags == RAM_CONTROL_SETUP) {
RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
RDMALocalBlocks *local = &rdma->local_ram_blocks;
- int reg_result_idx, i, j, nb_remote_blocks;
+ int reg_result_idx, i, j, nb_dest_blocks;
head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
trace_qemu_rdma_registration_stop_ram();
@@ -3162,7 +3168,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
return ret;
}
- nb_remote_blocks = resp.len / sizeof(RDMARemoteBlock);
+ nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
/*
* The protocol uses two different sets of rkeys (mutually exclusive):
@@ -3176,7 +3182,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
* and then propagates the remote ram block descriptions to his local copy.
*/
- if (local->nb_blocks != nb_remote_blocks) {
+ if (local->nb_blocks != nb_dest_blocks) {
ERROR(errp, "ram blocks mismatch #1! "
"Your QEMU command line parameters are probably "
"not identical on both the source and destination.");
@@ -3184,26 +3190,26 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
}
qemu_rdma_move_header(rdma, reg_result_idx, &resp);
- memcpy(rdma->block,
+ memcpy(rdma->dest_blocks,
rdma->wr_data[reg_result_idx].control_curr, resp.len);
- for (i = 0; i < nb_remote_blocks; i++) {
- network_to_remote_block(&rdma->block[i]);
+ for (i = 0; i < nb_dest_blocks; i++) {
+ network_to_dest_block(&rdma->dest_blocks[i]);
/* search local ram blocks */
for (j = 0; j < local->nb_blocks; j++) {
- if (rdma->block[i].offset != local->block[j].offset) {
+ if (rdma->dest_blocks[i].offset != local->block[j].offset) {
continue;
}
- if (rdma->block[i].length != local->block[j].length) {
+ if (rdma->dest_blocks[i].length != local->block[j].length) {
ERROR(errp, "ram blocks mismatch #2! "
"Your QEMU command line parameters are probably "
"not identical on both the source and destination.");
return -EINVAL;
}
local->block[j].remote_host_addr =
- rdma->block[i].remote_host_addr;
- local->block[j].remote_rkey = rdma->block[i].remote_rkey;
+ rdma->dest_blocks[i].remote_host_addr;
+ local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey;
break;
}
diff --git a/savevm.c b/migration/savevm.c
index 3b0e222cb3..2091882196 100644
--- a/savevm.c
+++ b/migration/savevm.c
@@ -2,6 +2,10 @@
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009-2015 Red Hat Inc
+ *
+ * Authors:
+ * Juan Quintela <quintela@redhat.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -51,6 +55,8 @@
#define ARP_PTYPE_IP 0x0800
#define ARP_OP_REQUEST_REV 0x3
+static bool skip_section_footers;
+
static int announce_self_create(uint8_t *buf,
uint8_t *mac_addr)
{
@@ -235,10 +241,15 @@ typedef struct SaveStateEntry {
int is_ram;
} SaveStateEntry;
+typedef struct SaveState {
+ QTAILQ_HEAD(, SaveStateEntry) handlers;
+ int global_section_id;
+} SaveState;
-static QTAILQ_HEAD(savevm_handlers, SaveStateEntry) savevm_handlers =
- QTAILQ_HEAD_INITIALIZER(savevm_handlers);
-static int global_section_id;
+static SaveState savevm_state = {
+ .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
+ .global_section_id = 0,
+};
static void dump_vmstate_vmsd(FILE *out_file,
const VMStateDescription *vmsd, int indent,
@@ -263,11 +274,11 @@ static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
}
static void dump_vmstate_vmss(FILE *out_file,
- const VMStateSubsection *subsection,
+ const VMStateDescription **subsection,
int indent)
{
- if (subsection->vmsd != NULL) {
- dump_vmstate_vmsd(out_file, subsection->vmsd, indent, true);
+ if (*subsection != NULL) {
+ dump_vmstate_vmsd(out_file, *subsection, indent, true);
}
}
@@ -308,12 +319,12 @@ static void dump_vmstate_vmsd(FILE *out_file,
fprintf(out_file, "\n%*s]", indent, "");
}
if (vmsd->subsections != NULL) {
- const VMStateSubsection *subsection = vmsd->subsections;
+ const VMStateDescription **subsection = vmsd->subsections;
bool first;
fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
first = true;
- while (subsection->vmsd != NULL) {
+ while (*subsection != NULL) {
if (!first) {
fprintf(out_file, ",\n");
}
@@ -383,7 +394,7 @@ static int calculate_new_instance_id(const char *idstr)
SaveStateEntry *se;
int instance_id = 0;
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (strcmp(idstr, se->idstr) == 0
&& instance_id <= se->instance_id) {
instance_id = se->instance_id + 1;
@@ -397,7 +408,7 @@ static int calculate_compat_instance_id(const char *idstr)
SaveStateEntry *se;
int instance_id = 0;
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->compat) {
continue;
}
@@ -425,7 +436,7 @@ int register_savevm_live(DeviceState *dev,
se = g_malloc0(sizeof(SaveStateEntry));
se->version_id = version_id;
- se->section_id = global_section_id++;
+ se->section_id = savevm_state.global_section_id++;
se->ops = ops;
se->opaque = opaque;
se->vmsd = NULL;
@@ -457,7 +468,7 @@ int register_savevm_live(DeviceState *dev,
}
assert(!se->compat || se->instance_id == 0);
/* add at the end of list */
- QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
+ QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
return 0;
}
@@ -491,9 +502,9 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
}
pstrcat(id, sizeof(id), idstr);
- QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+ QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
- QTAILQ_REMOVE(&savevm_handlers, se, entry);
+ QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
if (se->compat) {
g_free(se->compat);
}
@@ -515,7 +526,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
se = g_malloc0(sizeof(SaveStateEntry));
se->version_id = vmsd->version_id;
- se->section_id = global_section_id++;
+ se->section_id = savevm_state.global_section_id++;
se->opaque = opaque;
se->vmsd = vmsd;
se->alias_id = alias_id;
@@ -543,7 +554,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
}
assert(!se->compat || se->instance_id == 0);
/* add at the end of list */
- QTAILQ_INSERT_TAIL(&savevm_handlers, se, entry);
+ QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
return 0;
}
@@ -552,9 +563,9 @@ void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
{
SaveStateEntry *se, *new_se;
- QTAILQ_FOREACH_SAFE(se, &savevm_handlers, entry, new_se) {
+ QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
if (se->vmsd == vmsd && se->opaque == opaque) {
- QTAILQ_REMOVE(&savevm_handlers, se, entry);
+ QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
if (se->compat) {
g_free(se->compat);
}
@@ -602,11 +613,84 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
}
+void savevm_skip_section_footers(void)
+{
+ skip_section_footers = true;
+}
+
+/*
+ * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
+ */
+static void save_section_header(QEMUFile *f, SaveStateEntry *se,
+ uint8_t section_type)
+{
+ qemu_put_byte(f, section_type);
+ qemu_put_be32(f, se->section_id);
+
+ if (section_type == QEMU_VM_SECTION_FULL ||
+ section_type == QEMU_VM_SECTION_START) {
+ /* ID string */
+ size_t len = strlen(se->idstr);
+ qemu_put_byte(f, len);
+ qemu_put_buffer(f, (uint8_t *)se->idstr, len);
+
+ qemu_put_be32(f, se->instance_id);
+ qemu_put_be32(f, se->version_id);
+ }
+}
+
+/*
+ * Write a footer onto device sections that catches cases misformatted device
+ * sections.
+ */
+static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
+{
+ if (!skip_section_footers) {
+ qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
+ qemu_put_be32(f, se->section_id);
+ }
+}
+
+/*
+ * Read a footer off the wire and check that it matches the expected section
+ *
+ * Returns: true if the footer was good
+ * false if there is a problem (and calls error_report to say why)
+ */
+static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
+{
+ uint8_t read_mark;
+ uint32_t read_section_id;
+
+ if (skip_section_footers) {
+ /* No footer to check */
+ return true;
+ }
+
+ read_mark = qemu_get_byte(f);
+
+ if (read_mark != QEMU_VM_SECTION_FOOTER) {
+ error_report("Missing section footer for %s", se->idstr);
+ return false;
+ }
+
+ read_section_id = qemu_get_be32(f);
+ if (read_section_id != se->section_id) {
+ error_report("Mismatched section id in footer for %s -"
+ " read 0x%x expected 0x%x",
+ se->idstr, read_section_id, se->section_id);
+ return false;
+ }
+
+ /* All good */
+ return true;
+}
+
bool qemu_savevm_state_blocked(Error **errp)
{
SaveStateEntry *se;
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (se->vmsd && se->vmsd->unmigratable) {
error_setg(errp, "State blocked by non-migratable device '%s'",
se->idstr);
@@ -616,6 +700,13 @@ bool qemu_savevm_state_blocked(Error **errp)
return false;
}
+void qemu_savevm_state_header(QEMUFile *f)
+{
+ trace_savevm_state_header();
+ qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+ qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+}
+
void qemu_savevm_state_begin(QEMUFile *f,
const MigrationParams *params)
{
@@ -623,19 +714,14 @@ void qemu_savevm_state_begin(QEMUFile *f,
int ret;
trace_savevm_state_begin();
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->set_params) {
continue;
}
se->ops->set_params(params, se->opaque);
}
- qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
- qemu_put_be32(f, QEMU_VM_FILE_VERSION);
-
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
- int len;
-
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_setup) {
continue;
}
@@ -644,19 +730,10 @@ void qemu_savevm_state_begin(QEMUFile *f,
continue;
}
}
- /* Section type */
- qemu_put_byte(f, QEMU_VM_SECTION_START);
- qemu_put_be32(f, se->section_id);
-
- /* ID string */
- len = strlen(se->idstr);
- qemu_put_byte(f, len);
- qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
- qemu_put_be32(f, se->instance_id);
- qemu_put_be32(f, se->version_id);
+ save_section_header(f, se, QEMU_VM_SECTION_START);
ret = se->ops->save_live_setup(f, se->opaque);
+ save_section_footer(f, se);
if (ret < 0) {
qemu_file_set_error(f, ret);
break;
@@ -676,7 +753,7 @@ int qemu_savevm_state_iterate(QEMUFile *f)
int ret = 1;
trace_savevm_state_iterate();
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_iterate) {
continue;
}
@@ -689,12 +766,12 @@ int qemu_savevm_state_iterate(QEMUFile *f)
return 0;
}
trace_savevm_section_start(se->idstr, se->section_id);
- /* Section type */
- qemu_put_byte(f, QEMU_VM_SECTION_PART);
- qemu_put_be32(f, se->section_id);
+
+ save_section_header(f, se, QEMU_VM_SECTION_PART);
ret = se->ops->save_live_iterate(f, se->opaque);
trace_savevm_section_end(se->idstr, se->section_id, ret);
+ save_section_footer(f, se);
if (ret < 0) {
qemu_file_set_error(f, ret);
@@ -727,7 +804,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
cpu_synchronize_all_states();
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_complete) {
continue;
}
@@ -737,12 +814,12 @@ void qemu_savevm_state_complete(QEMUFile *f)
}
}
trace_savevm_section_start(se->idstr, se->section_id);
- /* Section type */
- qemu_put_byte(f, QEMU_VM_SECTION_END);
- qemu_put_be32(f, se->section_id);
+
+ save_section_header(f, se, QEMU_VM_SECTION_END);
ret = se->ops->save_live_complete(f, se->opaque);
trace_savevm_section_end(se->idstr, se->section_id, ret);
+ save_section_footer(f, se);
if (ret < 0) {
qemu_file_set_error(f, ret);
return;
@@ -752,8 +829,7 @@ void qemu_savevm_state_complete(QEMUFile *f)
vmdesc = qjson_new();
json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
json_start_array(vmdesc, "devices");
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
- int len;
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
continue;
@@ -764,22 +840,13 @@ void qemu_savevm_state_complete(QEMUFile *f)
json_prop_str(vmdesc, "name", se->idstr);
json_prop_int(vmdesc, "instance_id", se->instance_id);
- /* Section type */
- qemu_put_byte(f, QEMU_VM_SECTION_FULL);
- qemu_put_be32(f, se->section_id);
-
- /* ID string */
- len = strlen(se->idstr);
- qemu_put_byte(f, len);
- qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
- qemu_put_be32(f, se->instance_id);
- qemu_put_be32(f, se->version_id);
+ save_section_header(f, se, QEMU_VM_SECTION_FULL);
vmstate_save(f, se, vmdesc);
json_end_object(vmdesc);
trace_savevm_section_end(se->idstr, se->section_id, 0);
+ save_section_footer(f, se);
}
qemu_put_byte(f, QEMU_VM_EOF);
@@ -803,7 +870,7 @@ uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size)
SaveStateEntry *se;
uint64_t ret = 0;
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_pending) {
continue;
}
@@ -822,7 +889,7 @@ void qemu_savevm_state_cancel(void)
SaveStateEntry *se;
trace_savevm_state_cancel();
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (se->ops && se->ops->cancel) {
se->ops->cancel(se->opaque);
}
@@ -842,6 +909,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
}
qemu_mutex_unlock_iothread();
+ qemu_savevm_state_header(f);
qemu_savevm_state_begin(f, &params);
qemu_mutex_lock_iothread();
@@ -872,9 +940,7 @@ static int qemu_save_device_state(QEMUFile *f)
cpu_synchronize_all_states();
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
- int len;
-
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (se->is_ram) {
continue;
}
@@ -882,19 +948,11 @@ static int qemu_save_device_state(QEMUFile *f)
continue;
}
- /* Section type */
- qemu_put_byte(f, QEMU_VM_SECTION_FULL);
- qemu_put_be32(f, se->section_id);
-
- /* ID string */
- len = strlen(se->idstr);
- qemu_put_byte(f, len);
- qemu_put_buffer(f, (uint8_t *)se->idstr, len);
-
- qemu_put_be32(f, se->instance_id);
- qemu_put_be32(f, se->version_id);
+ save_section_header(f, se, QEMU_VM_SECTION_FULL);
vmstate_save(f, se, NULL);
+
+ save_section_footer(f, se);
}
qemu_put_byte(f, QEMU_VM_EOF);
@@ -906,7 +964,7 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
{
SaveStateEntry *se;
- QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!strcmp(se->idstr, idstr) &&
(instance_id == se->instance_id ||
instance_id == se->alias_id))
@@ -922,18 +980,26 @@ static SaveStateEntry *find_se(const char *idstr, int instance_id)
return NULL;
}
-typedef struct LoadStateEntry {
+struct LoadStateEntry {
QLIST_ENTRY(LoadStateEntry) entry;
SaveStateEntry *se;
int section_id;
int version_id;
-} LoadStateEntry;
+};
-int qemu_loadvm_state(QEMUFile *f)
+void loadvm_free_handlers(MigrationIncomingState *mis)
{
- QLIST_HEAD(, LoadStateEntry) loadvm_handlers =
- QLIST_HEAD_INITIALIZER(loadvm_handlers);
LoadStateEntry *le, *new_le;
+
+ QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) {
+ QLIST_REMOVE(le, entry);
+ g_free(le);
+ }
+}
+
+int qemu_loadvm_state(QEMUFile *f)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
uint8_t section_type;
unsigned int v;
@@ -964,8 +1030,8 @@ int qemu_loadvm_state(QEMUFile *f)
while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
uint32_t instance_id, version_id, section_id;
SaveStateEntry *se;
- char idstr[257];
- int len;
+ LoadStateEntry *le;
+ char idstr[256];
trace_qemu_loadvm_state_section(section_type);
switch (section_type) {
@@ -973,9 +1039,11 @@ int qemu_loadvm_state(QEMUFile *f)
case QEMU_VM_SECTION_FULL:
/* Read section start */
section_id = qemu_get_be32(f);
- len = qemu_get_byte(f);
- qemu_get_buffer(f, (uint8_t *)idstr, len);
- idstr[len] = 0;
+ if (!qemu_get_counted_string(f, idstr)) {
+ error_report("Unable to read ID string for section %u",
+ section_id);
+ return -EINVAL;
+ }
instance_id = qemu_get_be32(f);
version_id = qemu_get_be32(f);
@@ -1004,7 +1072,7 @@ int qemu_loadvm_state(QEMUFile *f)
le->se = se;
le->section_id = section_id;
le->version_id = version_id;
- QLIST_INSERT_HEAD(&loadvm_handlers, le, entry);
+ QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry);
ret = vmstate_load(f, le->se, le->version_id);
if (ret < 0) {
@@ -1012,13 +1080,17 @@ int qemu_loadvm_state(QEMUFile *f)
" device '%s'", instance_id, idstr);
goto out;
}
+ if (!check_section_footer(f, le->se)) {
+ ret = -EINVAL;
+ goto out;
+ }
break;
case QEMU_VM_SECTION_PART:
case QEMU_VM_SECTION_END:
section_id = qemu_get_be32(f);
trace_qemu_loadvm_state_section_partend(section_id);
- QLIST_FOREACH(le, &loadvm_handlers, entry) {
+ QLIST_FOREACH(le, &mis->loadvm_handlers, entry) {
if (le->section_id == section_id) {
break;
}
@@ -1035,6 +1107,10 @@ int qemu_loadvm_state(QEMUFile *f)
section_id, le->se->idstr);
goto out;
}
+ if (!check_section_footer(f, le->se)) {
+ ret = -EINVAL;
+ goto out;
+ }
break;
default:
error_report("Unknown savevm section type %d", section_type);
@@ -1066,11 +1142,6 @@ int qemu_loadvm_state(QEMUFile *f)
ret = 0;
out:
- QLIST_FOREACH_SAFE(le, &loadvm_handlers, entry, new_le) {
- QLIST_REMOVE(le, entry);
- g_free(le);
- }
-
if (ret == 0) {
/* We may not have a VMDESC section, so ignore relative errors */
ret = file_error_after_eof;
@@ -1314,9 +1385,11 @@ int load_vmstate(const char *name)
}
qemu_system_reset(VMRESET_SILENT);
+ migration_incoming_state_new(f);
ret = qemu_loadvm_state(f);
qemu_fclose(f);
+ migration_incoming_state_destroy();
if (ret < 0) {
error_report("Error %d while loading VM state", ret);
return ret;
diff --git a/migration/vmstate.c b/migration/vmstate.c
index e5388f0596..6138d1acb7 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -341,11 +341,11 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
}
static const VMStateDescription *
- vmstate_get_subsection(const VMStateSubsection *sub, char *idstr)
+vmstate_get_subsection(const VMStateDescription **sub, char *idstr)
{
- while (sub && sub->needed) {
- if (strcmp(idstr, sub->vmsd->name) == 0) {
- return sub->vmsd;
+ while (sub && *sub && (*sub)->needed) {
+ if (strcmp(idstr, (*sub)->name) == 0) {
+ return *sub;
}
sub++;
}
@@ -358,7 +358,7 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
trace_vmstate_subsection_load(vmsd->name);
while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
- char idstr[256];
+ char idstr[256], *idstr_ret;
int ret;
uint8_t version_id, len, size;
const VMStateDescription *sub_vmsd;
@@ -369,11 +369,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
trace_vmstate_subsection_load_bad(vmsd->name, "(short)");
return 0;
}
- size = qemu_peek_buffer(f, (uint8_t *)idstr, len, 2);
+ size = qemu_peek_buffer(f, (uint8_t **)&idstr_ret, len, 2);
if (size != len) {
trace_vmstate_subsection_load_bad(vmsd->name, "(peek fail)");
return 0;
}
+ memcpy(idstr, idstr_ret, size);
idstr[size] = 0;
if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {
@@ -405,12 +406,12 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, QJSON *vmdesc)
{
- const VMStateSubsection *sub = vmsd->subsections;
+ const VMStateDescription **sub = vmsd->subsections;
bool subsection_found = false;
- while (sub && sub->needed) {
- if (sub->needed(opaque)) {
- const VMStateDescription *vmsd = sub->vmsd;
+ while (sub && *sub && (*sub)->needed) {
+ if ((*sub)->needed(opaque)) {
+ const VMStateDescription *vmsd = *sub;
uint8_t len;
if (vmdesc) {
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index 0c8b22f2aa..f6894bece9 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -474,6 +474,7 @@ class MigrationDump(object):
QEMU_VM_SECTION_FULL = 0x04
QEMU_VM_SUBSECTION = 0x05
QEMU_VM_VMDESCRIPTION = 0x06
+ QEMU_VM_SECTION_FOOTER= 0x7e
def __init__(self, filename):
self.section_classes = { ( 'ram', 0 ) : [ RamSection, None ],
@@ -526,6 +527,10 @@ class MigrationDump(object):
elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END:
section_id = file.read32()
self.sections[section_id].read()
+ elif section_type == self.QEMU_VM_SECTION_FOOTER:
+ read_section_id = file.read32()
+ if read_section_id != section_id:
+ raise Exception("Mismatched section footer: %x vs %x" % (read_section_id, section_id))
else:
raise Exception("Unknown section type: %d" % section_type)
file.close()
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 9446e5a8ab..36365a57c7 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -40,6 +40,7 @@ static const VMStateDescription vmstate_vfp = {
.name = "cpu/vfp",
.version_id = 3,
.minimum_version_id = 3,
+ .needed = vfp_needed,
.fields = (VMStateField[]) {
VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64),
/* The xregs array is a little awkward because element 1 (FPSCR)
@@ -72,6 +73,7 @@ static const VMStateDescription vmstate_iwmmxt = {
.name = "cpu/iwmmxt",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = iwmmxt_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16),
VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16),
@@ -91,6 +93,7 @@ static const VMStateDescription vmstate_m = {
.name = "cpu/m",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = m_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT32(env.v7m.other_sp, ARMCPU),
VMSTATE_UINT32(env.v7m.vecbase, ARMCPU),
@@ -114,6 +117,7 @@ static const VMStateDescription vmstate_thumb2ee = {
.name = "cpu/thumb2ee",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = thumb2ee_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT32(env.teecr, ARMCPU),
VMSTATE_UINT32(env.teehbr, ARMCPU),
@@ -282,21 +286,11 @@ const VMStateDescription vmstate_arm_cpu = {
VMSTATE_BOOL(powered_off, ARMCPU),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_vfp,
- .needed = vfp_needed,
- } , {
- .vmsd = &vmstate_iwmmxt,
- .needed = iwmmxt_needed,
- } , {
- .vmsd = &vmstate_m,
- .needed = m_needed,
- } , {
- .vmsd = &vmstate_thumb2ee,
- .needed = thumb2ee_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_vfp,
+ &vmstate_iwmmxt,
+ &vmstate_m,
+ &vmstate_thumb2ee,
+ NULL
}
};
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 69d86cb476..a0df64b577 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -403,6 +403,7 @@ static const VMStateDescription vmstate_steal_time_msr = {
.name = "cpu/steal_time_msr",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = steal_time_msr_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.steal_time_msr, X86CPU),
VMSTATE_END_OF_LIST()
@@ -413,6 +414,7 @@ static const VMStateDescription vmstate_async_pf_msr = {
.name = "cpu/async_pf_msr",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = async_pf_msr_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.async_pf_en_msr, X86CPU),
VMSTATE_END_OF_LIST()
@@ -423,6 +425,7 @@ static const VMStateDescription vmstate_pv_eoi_msr = {
.name = "cpu/async_pv_eoi_msr",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = pv_eoi_msr_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.pv_eoi_en_msr, X86CPU),
VMSTATE_END_OF_LIST()
@@ -441,6 +444,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
.name = "cpu/fpop_ip_dp",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fpop_ip_dp_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT16(env.fpop, X86CPU),
VMSTATE_UINT64(env.fpip, X86CPU),
@@ -461,6 +465,7 @@ static const VMStateDescription vmstate_msr_tsc_adjust = {
.name = "cpu/msr_tsc_adjust",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = tsc_adjust_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.tsc_adjust, X86CPU),
VMSTATE_END_OF_LIST()
@@ -479,6 +484,7 @@ static const VMStateDescription vmstate_msr_tscdeadline = {
.name = "cpu/msr_tscdeadline",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = tscdeadline_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.tsc_deadline, X86CPU),
VMSTATE_END_OF_LIST()
@@ -505,6 +511,7 @@ static const VMStateDescription vmstate_msr_ia32_misc_enable = {
.name = "cpu/msr_ia32_misc_enable",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = misc_enable_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.msr_ia32_misc_enable, X86CPU),
VMSTATE_END_OF_LIST()
@@ -515,6 +522,7 @@ static const VMStateDescription vmstate_msr_ia32_feature_control = {
.name = "cpu/msr_ia32_feature_control",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = feature_control_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.msr_ia32_feature_control, X86CPU),
VMSTATE_END_OF_LIST()
@@ -549,6 +557,7 @@ static const VMStateDescription vmstate_msr_architectural_pmu = {
.name = "cpu/msr_architectural_pmu",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = pmu_enable_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU),
VMSTATE_UINT64(env.msr_global_ctrl, X86CPU),
@@ -584,6 +593,7 @@ static const VMStateDescription vmstate_mpx = {
.name = "cpu/mpx",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = mpx_needed,
.fields = (VMStateField[]) {
VMSTATE_BND_REGS(env.bnd_regs, X86CPU, 4),
VMSTATE_UINT64(env.bndcs_regs.cfgu, X86CPU),
@@ -605,6 +615,7 @@ static const VMStateDescription vmstate_msr_hypercall_hypercall = {
.name = "cpu/msr_hyperv_hypercall",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = hyperv_hypercall_enable_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.msr_hv_guest_os_id, X86CPU),
VMSTATE_UINT64(env.msr_hv_hypercall, X86CPU),
@@ -624,6 +635,7 @@ static const VMStateDescription vmstate_msr_hyperv_vapic = {
.name = "cpu/msr_hyperv_vapic",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = hyperv_vapic_enable_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.msr_hv_vapic, X86CPU),
VMSTATE_END_OF_LIST()
@@ -642,6 +654,7 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
.name = "cpu/msr_hyperv_time",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = hyperv_time_enable_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.msr_hv_tsc, X86CPU),
VMSTATE_END_OF_LIST()
@@ -683,6 +696,7 @@ static const VMStateDescription vmstate_avx512 = {
.name = "cpu/avx512",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = avx512_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
VMSTATE_ZMMH_REGS_VARS(env.xmm_regs, X86CPU, 0),
@@ -705,6 +719,7 @@ static const VMStateDescription vmstate_xss = {
.name = "cpu/xss",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = xss_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.xss, X86CPU),
VMSTATE_END_OF_LIST()
@@ -813,54 +828,22 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_END_OF_LIST()
/* The above list is not sorted /wrt version numbers, watch out! */
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_async_pf_msr,
- .needed = async_pf_msr_needed,
- } , {
- .vmsd = &vmstate_pv_eoi_msr,
- .needed = pv_eoi_msr_needed,
- } , {
- .vmsd = &vmstate_steal_time_msr,
- .needed = steal_time_msr_needed,
- } , {
- .vmsd = &vmstate_fpop_ip_dp,
- .needed = fpop_ip_dp_needed,
- }, {
- .vmsd = &vmstate_msr_tsc_adjust,
- .needed = tsc_adjust_needed,
- }, {
- .vmsd = &vmstate_msr_tscdeadline,
- .needed = tscdeadline_needed,
- }, {
- .vmsd = &vmstate_msr_ia32_misc_enable,
- .needed = misc_enable_needed,
- }, {
- .vmsd = &vmstate_msr_ia32_feature_control,
- .needed = feature_control_needed,
- }, {
- .vmsd = &vmstate_msr_architectural_pmu,
- .needed = pmu_enable_needed,
- } , {
- .vmsd = &vmstate_mpx,
- .needed = mpx_needed,
- }, {
- .vmsd = &vmstate_msr_hypercall_hypercall,
- .needed = hyperv_hypercall_enable_needed,
- }, {
- .vmsd = &vmstate_msr_hyperv_vapic,
- .needed = hyperv_vapic_enable_needed,
- }, {
- .vmsd = &vmstate_msr_hyperv_time,
- .needed = hyperv_time_enable_needed,
- }, {
- .vmsd = &vmstate_avx512,
- .needed = avx512_needed,
- }, {
- .vmsd = &vmstate_xss,
- .needed = xss_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_async_pf_msr,
+ &vmstate_pv_eoi_msr,
+ &vmstate_steal_time_msr,
+ &vmstate_fpop_ip_dp,
+ &vmstate_msr_tsc_adjust,
+ &vmstate_msr_tscdeadline,
+ &vmstate_msr_ia32_misc_enable,
+ &vmstate_msr_ia32_feature_control,
+ &vmstate_msr_architectural_pmu,
+ &vmstate_mpx,
+ &vmstate_msr_hypercall_hypercall,
+ &vmstate_msr_hyperv_vapic,
+ &vmstate_msr_hyperv_time,
+ &vmstate_avx512,
+ &vmstate_xss,
+ NULL
}
};
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index d875211a2d..f4ac7611dd 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -213,6 +213,7 @@ static const VMStateDescription vmstate_fpu = {
.name = "cpu/fpu",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fpu_needed,
.fields = (VMStateField[]) {
VMSTATE_FLOAT64_ARRAY(env.fpr, PowerPCCPU, 32),
VMSTATE_UINTTL(env.fpscr, PowerPCCPU),
@@ -231,6 +232,7 @@ static const VMStateDescription vmstate_altivec = {
.name = "cpu/altivec",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = altivec_needed,
.fields = (VMStateField[]) {
VMSTATE_AVR_ARRAY(env.avr, PowerPCCPU, 32),
VMSTATE_UINT32(env.vscr, PowerPCCPU),
@@ -249,6 +251,7 @@ static const VMStateDescription vmstate_vsx = {
.name = "cpu/vsx",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = vsx_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64_ARRAY(env.vsr, PowerPCCPU, 32),
VMSTATE_END_OF_LIST()
@@ -269,6 +272,7 @@ static const VMStateDescription vmstate_tm = {
.version_id = 1,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
+ .needed = tm_needed,
.fields = (VMStateField []) {
VMSTATE_UINTTL_ARRAY(env.tm_gpr, PowerPCCPU, 32),
VMSTATE_AVR_ARRAY(env.tm_vsr, PowerPCCPU, 64),
@@ -302,6 +306,7 @@ static const VMStateDescription vmstate_sr = {
.name = "cpu/sr",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = sr_needed,
.fields = (VMStateField[]) {
VMSTATE_UINTTL_ARRAY(env.sr, PowerPCCPU, 32),
VMSTATE_END_OF_LIST()
@@ -351,6 +356,7 @@ static const VMStateDescription vmstate_slb = {
.name = "cpu/slb",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = slb_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32_EQUAL(env.slb_nr, PowerPCCPU),
VMSTATE_SLB_ARRAY(env.slb, PowerPCCPU, MAX_SLB_ENTRIES),
@@ -383,6 +389,7 @@ static const VMStateDescription vmstate_tlb6xx = {
.name = "cpu/tlb6xx",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = tlb6xx_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlb6, PowerPCCPU,
@@ -429,6 +436,7 @@ static const VMStateDescription vmstate_pbr403 = {
.name = "cpu/pbr403",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = pbr403_needed,
.fields = (VMStateField[]) {
VMSTATE_UINTTL_ARRAY(env.pb, PowerPCCPU, 4),
VMSTATE_END_OF_LIST()
@@ -439,6 +447,7 @@ static const VMStateDescription vmstate_tlbemb = {
.name = "cpu/tlb6xx",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = tlbemb_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbe, PowerPCCPU,
@@ -448,13 +457,9 @@ static const VMStateDescription vmstate_tlbemb = {
/* 403 protection registers */
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_pbr403,
- .needed = pbr403_needed,
- } , {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_pbr403,
+ NULL
}
};
@@ -483,6 +488,7 @@ static const VMStateDescription vmstate_tlbmas = {
.name = "cpu/tlbmas",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = tlbmas_needed,
.fields = (VMStateField[]) {
VMSTATE_INT32_EQUAL(env.nb_tlb, PowerPCCPU),
VMSTATE_STRUCT_VARRAY_POINTER_INT32(env.tlb.tlbm, PowerPCCPU,
@@ -533,38 +539,18 @@ const VMStateDescription vmstate_ppc_cpu = {
VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection []) {
- {
- .vmsd = &vmstate_fpu,
- .needed = fpu_needed,
- } , {
- .vmsd = &vmstate_altivec,
- .needed = altivec_needed,
- } , {
- .vmsd = &vmstate_vsx,
- .needed = vsx_needed,
- } , {
- .vmsd = &vmstate_sr,
- .needed = sr_needed,
- } , {
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_fpu,
+ &vmstate_altivec,
+ &vmstate_vsx,
+ &vmstate_sr,
#ifdef TARGET_PPC64
- .vmsd = &vmstate_tm,
- .needed = tm_needed,
- } , {
- .vmsd = &vmstate_slb,
- .needed = slb_needed,
- } , {
+ &vmstate_tm,
+ &vmstate_slb,
#endif /* TARGET_PPC64 */
- .vmsd = &vmstate_tlb6xx,
- .needed = tlb6xx_needed,
- } , {
- .vmsd = &vmstate_tlbemb,
- .needed = tlbemb_needed,
- } , {
- .vmsd = &vmstate_tlbmas,
- .needed = tlbmas_needed,
- } , {
- /* empty */
- }
+ &vmstate_tlb6xx,
+ &vmstate_tlbemb,
+ &vmstate_tlbmas,
+ NULL
}
};
diff --git a/target-s390x/machine.c b/target-s390x/machine.c
index 004474959a..b76fb08319 100644
--- a/target-s390x/machine.c
+++ b/target-s390x/machine.c
@@ -42,10 +42,17 @@ static void cpu_pre_save(void *opaque)
}
}
+static inline bool fpu_needed(void *opaque)
+{
+ /* This looks odd, but we might want to NOT transfer fprs in the future */
+ return true;
+}
+
const VMStateDescription vmstate_fpu = {
.name = "cpu/fpu",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = fpu_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU),
@@ -68,16 +75,11 @@ const VMStateDescription vmstate_fpu = {
}
};
-static inline bool fpu_needed(void *opaque)
-{
- /* This looks odd, but we might want to NOT transfer fprs in the future */
- return true;
-}
-
const VMStateDescription vmstate_vregs = {
.name = "cpu/vregs",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = vregs_needed,
.fields = (VMStateField[]) {
/* vregs[0][0] -> vregs[15][0] and fregs are overlays */
VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU),
@@ -159,16 +161,10 @@ const VMStateDescription vmstate_s390_cpu = {
VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL, 0,
irqstate_saved_size),
VMSTATE_END_OF_LIST()
- },
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &vmstate_fpu,
- .needed = fpu_needed,
- } , {
- .vmsd = &vmstate_vregs,
- .needed = vregs_needed,
- } , {
- /* empty */
- }
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_fpu,
+ &vmstate_vregs,
+ NULL
},
};
diff --git a/trace-events b/trace-events
index 2662ffa850..1abca7a1e5 100644
--- a/trace-events
+++ b/trace-events
@@ -1179,13 +1179,14 @@ virtio_gpu_cmd_res_flush(uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint3
virtio_gpu_fence_ctrl(uint64_t fence, uint32_t type) "fence 0x%" PRIx64 ", type 0x%x"
virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64
-# savevm.c
+# migration/savevm.c
qemu_loadvm_state_section(unsigned int section_type) "%d"
qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
savevm_state_begin(void) ""
+savevm_state_header(void) ""
savevm_state_iterate(void) ""
savevm_state_complete(void) ""
savevm_state_cancel(void) ""
@@ -1205,7 +1206,7 @@ vmstate_subsection_load_good(const char *parent) "%s"
# qemu-file.c
qemu_file_fclose(void) ""
-# arch_init.c
+# migration/ram.c
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
migration_throttle(void) ""