aboutsummaryrefslogtreecommitdiff
path: root/migration
diff options
context:
space:
mode:
Diffstat (limited to 'migration')
-rw-r--r--migration/Makefile.objs1
-rw-r--r--migration/block-dirty-bitmap.c746
-rw-r--r--migration/block.c7
-rw-r--r--migration/migration.c30
-rw-r--r--migration/migration.h4
-rw-r--r--migration/qemu-file.c13
-rw-r--r--migration/qemu-file.h2
-rw-r--r--migration/ram.c9
-rw-r--r--migration/savevm.c33
-rw-r--r--migration/savevm.h5
-rw-r--r--migration/trace-events16
11 files changed, 843 insertions, 23 deletions
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 99e038024d..c83ec47ba8 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -6,6 +6,7 @@ common-obj-y += qemu-file.o global_state.o
common-obj-y += qemu-file-channel.o
common-obj-y += xbzrle.o postcopy-ram.o
common-obj-y += qjson.o
+common-obj-y += block-dirty-bitmap.o
common-obj-$(CONFIG_RDMA) += rdma.o
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
new file mode 100644
index 0000000000..dd04f102d8
--- /dev/null
+++ b/migration/block-dirty-bitmap.c
@@ -0,0 +1,746 @@
+/*
+ * Block dirty bitmap postcopy migration
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (c) 2016-2017 Virtuozzo International GmbH. All rights reserved.
+ *
+ * Authors:
+ * Liran Schour <lirans@il.ibm.com>
+ * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ * This file is derived from migration/block.c, so it's author and IBM copyright
+ * are here, although content is quite different.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * ***
+ *
+ * Here postcopy migration of dirty bitmaps is realized. Only QMP-addressable
+ * bitmaps are migrated.
+ *
+ * Bitmap migration implies creating bitmap with the same name and granularity
+ * in destination QEMU. If the bitmap with the same name (for the same node)
+ * already exists on destination an error will be generated.
+ *
+ * format of migration:
+ *
+ * # Header (shared for different chunk types)
+ * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
+ * [ 1 byte: node name size ] \ flags & DEVICE_NAME
+ * [ n bytes: node name ] /
+ * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
+ * [ n bytes: bitmap name ] /
+ *
+ * # Start of bitmap migration (flags & START)
+ * header
+ * be64: granularity
+ * 1 byte: bitmap flags (corresponds to BdrvDirtyBitmap)
+ * bit 0 - bitmap is enabled
+ * bit 1 - bitmap is persistent
+ * bit 2 - bitmap is autoloading
+ * bits 3-7 - reserved, must be zero
+ *
+ * # Complete of bitmap migration (flags & COMPLETE)
+ * header
+ *
+ * # Data chunk of bitmap migration
+ * header
+ * be64: start sector
+ * be32: number of sectors
+ * [ be64: buffer size ] \ ! (flags & ZEROES)
+ * [ n bytes: buffer ] /
+ *
+ * The last chunk in stream should contain flags & EOS. The chunk may skip
+ * device and/or bitmap names, assuming them to be the same with the previous
+ * chunk.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include "migration/vmstate.h"
+#include "migration/register.h"
+#include "qemu/hbitmap.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy one, two or four bytes (Big Endian). The size is determined as
+ * follows:
+ * in first (most significant) byte bit 8 is clear --> one byte
+ * in first byte bit 8 is set --> two or four bytes, depending on second
+ * byte:
+ * | in second byte bit 8 is clear --> two bytes
+ * | in second byte bit 8 is set --> four bytes
+ */
+#define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
+#define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
+#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
+#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
+#define DIRTY_BITMAP_MIG_FLAG_START 0x10
+#define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
+#define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
+
+#define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
+
+#define DIRTY_BITMAP_MIG_START_FLAG_ENABLED 0x01
+#define DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT 0x02
+/* 0x04 was "AUTOLOAD" flags on elder versions, no it is ignored */
+#define DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK 0xf8
+
+typedef struct DirtyBitmapMigBitmapState {
+ /* Written during setup phase. */
+ BlockDriverState *bs;
+ const char *node_name;
+ BdrvDirtyBitmap *bitmap;
+ uint64_t total_sectors;
+ uint64_t sectors_per_chunk;
+ QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry;
+ uint8_t flags;
+
+ /* For bulk phase. */
+ bool bulk_completed;
+ uint64_t cur_sector;
+} DirtyBitmapMigBitmapState;
+
+typedef struct DirtyBitmapMigState {
+ QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list;
+
+ bool bulk_completed;
+ bool no_bitmaps;
+
+ /* for send_bitmap_bits() */
+ BlockDriverState *prev_bs;
+ BdrvDirtyBitmap *prev_bitmap;
+} DirtyBitmapMigState;
+
+typedef struct DirtyBitmapLoadState {
+ uint32_t flags;
+ char node_name[256];
+ char bitmap_name[256];
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+} DirtyBitmapLoadState;
+
+static DirtyBitmapMigState dirty_bitmap_mig_state;
+
+typedef struct DirtyBitmapLoadBitmapState {
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ bool migrated;
+} DirtyBitmapLoadBitmapState;
+static GSList *enabled_bitmaps;
+QemuMutex finish_lock;
+
+void init_dirty_bitmap_incoming_migration(void)
+{
+ qemu_mutex_init(&finish_lock);
+}
+
+static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
+{
+ uint8_t flags = qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 8 | qemu_get_byte(f);
+ if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
+ flags = flags << 16 | qemu_get_be16(f);
+ }
+ }
+
+ return flags;
+}
+
+static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
+{
+ /* The code currently do not send flags more than one byte */
+ assert(!(flags & (0xffffff00 | DIRTY_BITMAP_MIG_EXTRA_FLAGS)));
+
+ qemu_put_byte(f, flags);
+}
+
+static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint32_t additional_flags)
+{
+ BlockDriverState *bs = dbms->bs;
+ BdrvDirtyBitmap *bitmap = dbms->bitmap;
+ uint32_t flags = additional_flags;
+ trace_send_bitmap_header_enter();
+
+ if (bs != dirty_bitmap_mig_state.prev_bs) {
+ dirty_bitmap_mig_state.prev_bs = bs;
+ flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
+ }
+
+ if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
+ dirty_bitmap_mig_state.prev_bitmap = bitmap;
+ flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
+ }
+
+ qemu_put_bitmap_flags(f, flags);
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ qemu_put_counted_string(f, dbms->node_name);
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
+ }
+}
+
+static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
+ qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
+ qemu_put_byte(f, dbms->flags);
+}
+
+static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
+}
+
+static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint64_t start_sector, uint32_t nr_sectors)
+{
+ /* align for buffer_is_zero() */
+ uint64_t align = 4 * sizeof(long);
+ uint64_t unaligned_size =
+ bdrv_dirty_bitmap_serialization_size(
+ dbms->bitmap, start_sector << BDRV_SECTOR_BITS,
+ (uint64_t)nr_sectors << BDRV_SECTOR_BITS);
+ uint64_t buf_size = QEMU_ALIGN_UP(unaligned_size, align);
+ uint8_t *buf = g_malloc0(buf_size);
+ uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
+
+ bdrv_dirty_bitmap_serialize_part(
+ dbms->bitmap, buf, start_sector << BDRV_SECTOR_BITS,
+ (uint64_t)nr_sectors << BDRV_SECTOR_BITS);
+
+ if (buffer_is_zero(buf, buf_size)) {
+ g_free(buf);
+ buf = NULL;
+ flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
+ }
+
+ trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
+
+ send_bitmap_header(f, dbms, flags);
+
+ qemu_put_be64(f, start_sector);
+ qemu_put_be32(f, nr_sectors);
+
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration. */
+ if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ qemu_fflush(f);
+ } else {
+ qemu_put_be64(f, buf_size);
+ qemu_put_buffer(f, buf, buf_size);
+ }
+
+ g_free(buf);
+}
+
+/* Called with iothread lock taken. */
+static void dirty_bitmap_mig_cleanup(void)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
+ QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
+ bdrv_dirty_bitmap_set_qmp_locked(dbms->bitmap, false);
+ bdrv_unref(dbms->bs);
+ g_free(dbms);
+ }
+}
+
+/* Called with iothread lock taken. */
+static int init_dirty_bitmap_migration(void)
+{
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ DirtyBitmapMigBitmapState *dbms;
+ BdrvNextIterator it;
+
+ dirty_bitmap_mig_state.bulk_completed = false;
+ dirty_bitmap_mig_state.prev_bs = NULL;
+ dirty_bitmap_mig_state.prev_bitmap = NULL;
+ dirty_bitmap_mig_state.no_bitmaps = false;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ const char *drive_name = bdrv_get_device_or_node_name(bs);
+
+ /* skip automatically inserted nodes */
+ while (bs && bs->drv && bs->implicit) {
+ bs = backing_bs(bs);
+ }
+
+ for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap;
+ bitmap = bdrv_dirty_bitmap_next(bs, bitmap))
+ {
+ if (!bdrv_dirty_bitmap_name(bitmap)) {
+ continue;
+ }
+
+ if (drive_name == NULL) {
+ error_report("Found bitmap '%s' in unnamed node %p. It can't "
+ "be migrated", bdrv_dirty_bitmap_name(bitmap), bs);
+ goto fail;
+ }
+
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ error_report("Can't migrate frozen dirty bitmap: '%s",
+ bdrv_dirty_bitmap_name(bitmap));
+ goto fail;
+ }
+
+ if (bdrv_dirty_bitmap_qmp_locked(bitmap)) {
+ error_report("Can't migrate locked dirty bitmap: '%s",
+ bdrv_dirty_bitmap_name(bitmap));
+ goto fail;
+ }
+
+ bdrv_ref(bs);
+ bdrv_dirty_bitmap_set_qmp_locked(bitmap, true);
+
+ dbms = g_new0(DirtyBitmapMigBitmapState, 1);
+ dbms->bs = bs;
+ dbms->node_name = drive_name;
+ dbms->bitmap = bitmap;
+ dbms->total_sectors = bdrv_nb_sectors(bs);
+ dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+ bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+ if (bdrv_dirty_bitmap_enabled(bitmap)) {
+ dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
+ }
+ if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+ dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT;
+ }
+
+ QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
+ dbms, entry);
+ }
+ }
+
+ /* unset persistance here, to not roll back it */
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ bdrv_dirty_bitmap_set_persistance(dbms->bitmap, false);
+ }
+
+ if (QSIMPLEQ_EMPTY(&dirty_bitmap_mig_state.dbms_list)) {
+ dirty_bitmap_mig_state.no_bitmaps = true;
+ }
+
+ return 0;
+
+fail:
+ dirty_bitmap_mig_cleanup();
+
+ return -1;
+}
+
+/* Called with no lock taken. */
+static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
+ dbms->sectors_per_chunk);
+
+ send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
+
+ dbms->cur_sector += nr_sectors;
+ if (dbms->cur_sector >= dbms->total_sectors) {
+ dbms->bulk_completed = true;
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase(QEMUFile *f, bool limit)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ while (!dbms->bulk_completed) {
+ bulk_phase_send_chunk(f, dbms);
+ if (limit && qemu_file_rate_limit(f)) {
+ return;
+ }
+ }
+ }
+
+ dirty_bitmap_mig_state.bulk_completed = true;
+}
+
+/* for SaveVMHandlers */
+static void dirty_bitmap_save_cleanup(void *opaque)
+{
+ dirty_bitmap_mig_cleanup();
+}
+
+static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
+{
+ trace_dirty_bitmap_save_iterate(migration_in_postcopy());
+
+ if (migration_in_postcopy() && !dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, true);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return dirty_bitmap_mig_state.bulk_completed;
+}
+
+/* Called with iothread lock taken. */
+
+static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ trace_dirty_bitmap_save_complete_enter();
+
+ if (!dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, false);
+ }
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_complete(f, dbms);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ trace_dirty_bitmap_save_complete_finish();
+
+ dirty_bitmap_mig_cleanup();
+ return 0;
+}
+
+static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ uint64_t pending = 0;
+
+ qemu_mutex_lock_iothread();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
+ uint64_t sectors = dbms->bulk_completed ? 0 :
+ dbms->total_sectors - dbms->cur_sector;
+
+ pending += DIV_ROUND_UP(sectors * BDRV_SECTOR_SIZE, gran);
+ }
+
+ qemu_mutex_unlock_iothread();
+
+ trace_dirty_bitmap_save_pending(pending, max_size);
+
+ *res_postcopy_only += pending;
+}
+
+/* First occurrence of this bitmap. It should be created if doesn't exist */
+static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ uint32_t granularity = qemu_get_be32(f);
+ uint8_t flags = qemu_get_byte(f);
+
+ if (s->bitmap) {
+ error_report("Bitmap with the same name ('%s') already exists on "
+ "destination", bdrv_dirty_bitmap_name(s->bitmap));
+ return -EINVAL;
+ } else {
+ s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
+ s->bitmap_name, &local_err);
+ if (!s->bitmap) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK) {
+ error_report("Unknown flags in migrated dirty bitmap header: %x",
+ flags);
+ return -EINVAL;
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT) {
+ bdrv_dirty_bitmap_set_persistance(s->bitmap, true);
+ }
+
+ bdrv_disable_dirty_bitmap(s->bitmap);
+ if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
+ DirtyBitmapLoadBitmapState *b;
+
+ bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ b = g_new(DirtyBitmapLoadBitmapState, 1);
+ b->bs = s->bs;
+ b->bitmap = s->bitmap;
+ b->migrated = false;
+ enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
+ }
+
+ return 0;
+}
+
+void dirty_bitmap_mig_before_vm_start(void)
+{
+ GSList *item;
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->migrated) {
+ bdrv_enable_dirty_bitmap(b->bitmap);
+ } else {
+ bdrv_dirty_bitmap_enable_successor(b->bitmap);
+ }
+
+ g_free(b);
+ }
+
+ g_slist_free(enabled_bitmaps);
+ enabled_bitmaps = NULL;
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ GSList *item;
+ trace_dirty_bitmap_load_complete();
+ bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->bitmap == s->bitmap) {
+ b->migrated = true;
+ break;
+ }
+ }
+
+ if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
+ bdrv_dirty_bitmap_lock(s->bitmap);
+ if (enabled_bitmaps == NULL) {
+ /* in postcopy */
+ bdrv_reclaim_dirty_bitmap_locked(s->bs, s->bitmap, &error_abort);
+ bdrv_enable_dirty_bitmap(s->bitmap);
+ } else {
+ /* target not started, successor must be empty */
+ int64_t count = bdrv_get_dirty_count(s->bitmap);
+ BdrvDirtyBitmap *ret = bdrv_reclaim_dirty_bitmap_locked(s->bs,
+ s->bitmap,
+ NULL);
+ /* bdrv_reclaim_dirty_bitmap can fail only on no successor (it
+ * must be) or on merge fail, but merge can't fail when second
+ * bitmap is empty
+ */
+ assert(ret == s->bitmap &&
+ count == bdrv_get_dirty_count(s->bitmap));
+ }
+ bdrv_dirty_bitmap_unlock(s->bitmap);
+ }
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ uint64_t first_byte = qemu_get_be64(f) << BDRV_SECTOR_BITS;
+ uint64_t nr_bytes = (uint64_t)qemu_get_be32(f) << BDRV_SECTOR_BITS;
+ trace_dirty_bitmap_load_bits_enter(first_byte >> BDRV_SECTOR_BITS,
+ nr_bytes >> BDRV_SECTOR_BITS);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ trace_dirty_bitmap_load_bits_zeroes();
+ bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes,
+ false);
+ } else {
+ size_t ret;
+ uint8_t *buf;
+ uint64_t buf_size = qemu_get_be64(f);
+ uint64_t needed_size =
+ bdrv_dirty_bitmap_serialization_size(s->bitmap,
+ first_byte, nr_bytes);
+
+ if (needed_size > buf_size ||
+ buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long))
+ /* Here used same alignment as in send_bitmap_bits */
+ ) {
+ error_report("Migrated bitmap granularity doesn't "
+ "match the destination bitmap '%s' granularity",
+ bdrv_dirty_bitmap_name(s->bitmap));
+ return -EINVAL;
+ }
+
+ buf = g_malloc(buf_size);
+ ret = qemu_get_buffer(f, buf, buf_size);
+ if (ret != buf_size) {
+ error_report("Failed to read bitmap bits");
+ return -EIO;
+ }
+
+ bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf, first_byte, nr_bytes,
+ false);
+ g_free(buf);
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ bool nothing;
+ s->flags = qemu_get_bitmap_flags(f);
+ trace_dirty_bitmap_load_header(s->flags);
+
+ nothing = s->flags == (s->flags & DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ if (!qemu_get_counted_string(f, s->node_name)) {
+ error_report("Unable to read node name string");
+ return -EINVAL;
+ }
+ s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
+ if (!s->bs) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+ } else if (!s->bs && !nothing) {
+ error_report("Error: block device name is not set");
+ return -EINVAL;
+ }
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ if (!qemu_get_counted_string(f, s->bitmap_name)) {
+ error_report("Unable to read bitmap name string");
+ return -EINVAL;
+ }
+ s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
+
+ /* bitmap may be NULL here, it wouldn't be an error if it is the
+ * first occurrence of the bitmap */
+ if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
+ error_report("Error: unknown dirty bitmap "
+ "'%s' for block device '%s'",
+ s->bitmap_name, s->node_name);
+ return -EINVAL;
+ }
+ } else if (!s->bitmap && !nothing) {
+ error_report("Error: block device name is not set");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
+{
+ static DirtyBitmapLoadState s;
+ int ret = 0;
+
+ trace_dirty_bitmap_load_enter();
+
+ if (version_id != 1) {
+ return -EINVAL;
+ }
+
+ do {
+ ret = dirty_bitmap_load_header(f, &s);
+
+ if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
+ ret = dirty_bitmap_load_start(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
+ dirty_bitmap_load_complete(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
+ ret = dirty_bitmap_load_bits(f, &s);
+ }
+
+ if (!ret) {
+ ret = qemu_file_get_error(f);
+ }
+
+ if (ret) {
+ return ret;
+ }
+ } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
+
+ trace_dirty_bitmap_load_success();
+ return 0;
+}
+
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms = NULL;
+ if (init_dirty_bitmap_migration() < 0) {
+ return -1;
+ }
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_start(f, dbms);
+ }
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return 0;
+}
+
+static bool dirty_bitmap_is_active(void *opaque)
+{
+ return migrate_dirty_bitmaps() && !dirty_bitmap_mig_state.no_bitmaps;
+}
+
+static bool dirty_bitmap_is_active_iterate(void *opaque)
+{
+ return dirty_bitmap_is_active(opaque) && !runstate_is_running();
+}
+
+static bool dirty_bitmap_has_postcopy(void *opaque)
+{
+ return true;
+}
+
+static SaveVMHandlers savevm_dirty_bitmap_handlers = {
+ .save_setup = dirty_bitmap_save_setup,
+ .save_live_complete_postcopy = dirty_bitmap_save_complete,
+ .save_live_complete_precopy = dirty_bitmap_save_complete,
+ .has_postcopy = dirty_bitmap_has_postcopy,
+ .save_live_pending = dirty_bitmap_save_pending,
+ .save_live_iterate = dirty_bitmap_save_iterate,
+ .is_active_iterate = dirty_bitmap_is_active_iterate,
+ .load_state = dirty_bitmap_load,
+ .save_cleanup = dirty_bitmap_save_cleanup,
+ .is_active = dirty_bitmap_is_active,
+};
+
+void dirty_bitmap_mig_init(void)
+{
+ QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
+
+ register_savevm_live(NULL, "dirty-bitmap", 0, 1,
+ &savevm_dirty_bitmap_handlers,
+ &dirty_bitmap_mig_state);
+}
diff --git a/migration/block.c b/migration/block.c
index 41b95d1dd8..5c03632257 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -864,8 +864,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
}
static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *non_postcopiable_pending,
- uint64_t *postcopiable_pending)
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
/* Estimate pending number of bytes to send */
uint64_t pending;
@@ -886,7 +887,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
/* We don't do postcopy */
- *non_postcopiable_pending += pending;
+ *res_precopy_only += pending;
}
static int block_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/migration/migration.c b/migration/migration.c
index 1f22f463d3..fc629e5965 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -159,6 +159,9 @@ MigrationIncomingState *migration_incoming_get_current(void)
sizeof(struct PostCopyFD));
qemu_mutex_init(&mis_current.rp_mutex);
qemu_event_init(&mis_current.main_thread_load_event, false);
+
+ init_dirty_bitmap_incoming_migration();
+
once = true;
}
return &mis_current;
@@ -326,6 +329,8 @@ static void process_incoming_migration_bh(void *opaque)
state, we need to obey autostart. Any other state is set with
runstate_set. */
+ dirty_bitmap_mig_before_vm_start();
+
if (!global_state_received() ||
global_state_get_runstate() == RUN_STATE_RUNNING) {
if (autostart) {
@@ -1028,7 +1033,7 @@ void qmp_migrate_start_postcopy(Error **errp)
{
MigrationState *s = migrate_get_current();
- if (!migrate_postcopy_ram()) {
+ if (!migrate_postcopy()) {
error_setg(errp, "Enable postcopy with migrate_set_capability before"
" the start of migration");
return;
@@ -1514,7 +1519,7 @@ bool migrate_postcopy_ram(void)
bool migrate_postcopy(void)
{
- return migrate_postcopy_ram();
+ return migrate_postcopy_ram() || migrate_dirty_bitmaps();
}
bool migrate_auto_converge(void)
@@ -1571,6 +1576,15 @@ int migrate_decompress_threads(void)
return s->parameters.decompress_threads;
}
+bool migrate_dirty_bitmaps(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+}
+
bool migrate_use_events(void)
{
MigrationState *s;
@@ -2248,20 +2262,20 @@ typedef enum {
*/
static MigIterateState migration_iteration_run(MigrationState *s)
{
- uint64_t pending_size, pend_post, pend_nonpost;
+ uint64_t pending_size, pend_pre, pend_compat, pend_post;
bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
- qemu_savevm_state_pending(s->to_dst_file, s->threshold_size,
- &pend_nonpost, &pend_post);
- pending_size = pend_nonpost + pend_post;
+ qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
+ &pend_compat, &pend_post);
+ pending_size = pend_pre + pend_compat + pend_post;
trace_migrate_pending(pending_size, s->threshold_size,
- pend_post, pend_nonpost);
+ pend_pre, pend_compat, pend_post);
if (pending_size && pending_size >= s->threshold_size) {
/* Still a significant amount to transfer */
if (migrate_postcopy() && !in_postcopy &&
- pend_nonpost <= s->threshold_size &&
+ pend_pre <= s->threshold_size &&
atomic_read(&s->start_postcopy)) {
if (postcopy_start(s)) {
error_report("%s: postcopy failed to start", __func__);
diff --git a/migration/migration.h b/migration/migration.h
index 83dc36b57a..8d2f320c48 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -209,6 +209,7 @@ bool migrate_postcopy(void);
bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
+bool migrate_dirty_bitmaps(void);
bool migrate_auto_converge(void);
bool migrate_use_multifd(void);
@@ -238,4 +239,7 @@ void migrate_send_rp_pong(MigrationIncomingState *mis,
int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
ram_addr_t start, size_t len);
+void dirty_bitmap_mig_before_vm_start(void);
+void init_dirty_bitmap_incoming_migration(void);
+
#endif
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2ab2bf362d..e85f501f86 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -734,6 +734,19 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
}
/*
+ * Put a string with one preceding byte containing its length. The length of
+ * the string should be less than 256.
+ */
+void qemu_put_counted_string(QEMUFile *f, const char *str)
+{
+ size_t len = strlen(str);
+
+ assert(len < 256);
+ qemu_put_byte(f, len);
+ qemu_put_buffer(f, (const uint8_t *)str, len);
+}
+
+/*
* Set the blocking state of the QEMUFile.
* Note: On some transports the OS only keeps a single blocking state for
* both directions, and thus changing the blocking on the main
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index aae4e5ed36..f4f356ab12 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -174,4 +174,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
ram_addr_t offset, size_t size,
uint64_t *bytes_sent);
+void qemu_put_counted_string(QEMUFile *f, const char *name);
+
#endif
diff --git a/migration/ram.c b/migration/ram.c
index 6ce7770b8c..0e90efa092 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2375,8 +2375,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
}
static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *non_postcopiable_pending,
- uint64_t *postcopiable_pending)
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
RAMState **temp = opaque;
RAMState *rs = *temp;
@@ -2396,9 +2397,9 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
if (migrate_postcopy_ram()) {
/* We can do postcopy, and all the data is postcopiable */
- *postcopiable_pending += remaining_size;
+ *res_compatible += remaining_size;
} else {
- *non_postcopiable_pending += remaining_size;
+ *res_precopy_only += remaining_size;
}
}
diff --git a/migration/savevm.c b/migration/savevm.c
index 305c3ceaf5..e2be02afe4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -54,6 +54,7 @@
#include "qemu/cutils.h"
#include "io/channel-buffer.h"
#include "io/channel-file.h"
+#include "sysemu/replay.h"
#ifndef ETH_P_RARP
#define ETH_P_RARP 0x8035
@@ -1028,6 +1029,11 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
continue;
}
}
+ if (se->ops && se->ops->is_active_iterate) {
+ if (!se->ops->is_active_iterate(se->opaque)) {
+ continue;
+ }
+ }
/*
* In the postcopy phase, any device that doesn't know how to
* do postcopy should have saved it's state in the _complete
@@ -1220,13 +1226,15 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
* for units that can't do postcopy.
*/
void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
- uint64_t *res_non_postcopiable,
- uint64_t *res_postcopiable)
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
SaveStateEntry *se;
- *res_non_postcopiable = 0;
- *res_postcopiable = 0;
+ *res_precopy_only = 0;
+ *res_compatible = 0;
+ *res_postcopy_only = 0;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
@@ -1239,7 +1247,8 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
}
}
se->ops->save_live_pending(f, se->opaque, threshold_size,
- res_non_postcopiable, res_postcopiable);
+ res_precopy_only, res_compatible,
+ res_postcopy_only);
}
}
@@ -1698,6 +1707,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_vmstart();
+ dirty_bitmap_mig_before_vm_start();
+
if (autostart) {
/* Hold onto your hats, starting the CPU */
vm_start();
@@ -2210,6 +2221,12 @@ int save_snapshot(const char *name, Error **errp)
struct tm tm;
AioContext *aio_context;
+ if (!replay_can_snapshot()) {
+ error_report("Record/replay does not allow making snapshot "
+ "right now. Try once more later.");
+ return ret;
+ }
+
if (!bdrv_all_can_snapshot(&bs)) {
error_setg(errp, "Device '%s' is writable but does not support "
"snapshots", bdrv_get_device_name(bs));
@@ -2401,6 +2418,12 @@ int load_snapshot(const char *name, Error **errp)
AioContext *aio_context;
MigrationIncomingState *mis = migration_incoming_get_current();
+ if (!replay_can_snapshot()) {
+ error_report("Record/replay does not allow loading snapshot "
+ "right now. Try once more later.");
+ return -EINVAL;
+ }
+
if (!bdrv_all_can_snapshot(&bs)) {
error_setg(errp,
"Device '%s' is writable but does not support snapshots",
diff --git a/migration/savevm.h b/migration/savevm.h
index 295c4a1f2c..cf4f0d37ca 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -38,8 +38,9 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f);
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
- uint64_t *res_non_postcopiable,
- uint64_t *res_postcopiable);
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only);
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
void qemu_savevm_send_open_return_path(QEMUFile *f);
int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
diff --git a/migration/trace-events b/migration/trace-events
index 1e353a317f..a180d7b008 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -86,7 +86,7 @@ migrate_fd_cleanup(void) ""
migrate_fd_error(const char *error_desc) "error=%s"
migrate_fd_cancel(void) ""
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx"
-migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")"
+migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
migration_completion_file_err(void) ""
migration_completion_postcopy_end(void) ""
@@ -233,3 +233,17 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
colo_failover_set_state(const char *new_state) "new state %s"
+
+# migration/block-dirty-bitmap.c
+send_bitmap_header_enter(void) ""
+send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
+dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
+dirty_bitmap_save_complete_enter(void) ""
+dirty_bitmap_save_complete_finish(void) ""
+dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
+dirty_bitmap_load_complete(void) ""
+dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
+dirty_bitmap_load_bits_zeroes(void) ""
+dirty_bitmap_load_header(uint32_t flags) "flags 0x%x"
+dirty_bitmap_load_enter(void) ""
+dirty_bitmap_load_success(void) ""